## Introducing Thanksgiving Dinner Data

In [160]:
import pandas as pd
data = pd.read_csv('thanksgiving.csv', encoding = 'Latin-1')
data.head()

Unnamed: 0,RespondentID,Do you celebrate Thanksgiving?,What is typically the main dish at your Thanksgiving dinner?,What is typically the main dish at your Thanksgiving dinner? - Other (please specify),How is the main dish typically cooked?,How is the main dish typically cooked? - Other (please specify),What kind of stuffing/dressing do you typically have?,What kind of stuffing/dressing do you typically have? - Other (please specify),What type of cranberry saucedo you typically have?,What type of cranberry saucedo you typically have? - Other (please specify),...,Have you ever tried to meet up with hometown friends on Thanksgiving night?,"Have you ever attended a ""Friendsgiving?""",Will you shop any Black Friday sales on Thanksgiving Day?,Do you work in retail?,Will you employer make you work on Black Friday?,How would you describe where you live?,Age,What is your gender?,How much total combined money did all members of your HOUSEHOLD earn last year?,US Region
0,4337954960,Yes,Turkey,,Baked,,Bread-based,,,,...,Yes,No,No,No,,Suburban,18 - 29,Male,"$75,000 to $99,999",Middle Atlantic
1,4337951949,Yes,Turkey,,Baked,,Bread-based,,Other (please specify),Homemade cranberry gelatin ring,...,No,No,Yes,No,,Rural,18 - 29,Female,"$50,000 to $74,999",East South Central
2,4337935621,Yes,Turkey,,Roasted,,Rice-based,,Homemade,,...,Yes,Yes,Yes,No,,Suburban,18 - 29,Male,"$0 to $9,999",Mountain
3,4337933040,Yes,Turkey,,Baked,,Bread-based,,Homemade,,...,Yes,No,No,No,,Urban,30 - 44,Male,"$200,000 and up",Pacific
4,4337931983,Yes,Tofurkey,,Baked,,Bread-based,,Canned,,...,Yes,No,No,No,,Urban,30 - 44,Male,"$100,000 to $124,999",Pacific


In [161]:
data.columns

Index(['RespondentID', 'Do you celebrate Thanksgiving?',
       'What is typically the main dish at your Thanksgiving dinner?',
       'What is typically the main dish at your Thanksgiving dinner? - Other (please specify)',
       'How is the main dish typically cooked?',
       'How is the main dish typically cooked? - Other (please specify)',
       'What kind of stuffing/dressing do you typically have?',
       'What kind of stuffing/dressing do you typically have? - Other (please specify)',
       'What type of cranberry saucedo you typically have?',
       'What type of cranberry saucedo you typically have? - Other (please specify)',
       'Do you typically have gravy?',
       'Which of these side dishes aretypically served at your Thanksgiving dinner? Please select all that apply. - Brussel sprouts',
       'Which of these side dishes aretypically served at your Thanksgiving dinner? Please select all that apply. - Carrots',
       'Which of these side dishes aretypically served

## Filtering Out Rows From A Dataframe

In [162]:
data['Do you celebrate Thanksgiving?'].value_counts()

Yes    980
No      78
Name: Do you celebrate Thanksgiving?, dtype: int64

In [163]:
data = data[data['Do you celebrate Thanksgiving?'] == 'Yes']

## Using value_counts to Explore Main Dishes

In [164]:
data['What is typically the main dish at your Thanksgiving dinner?'].value_counts()

Turkey                    859
Other (please specify)     35
Ham/Pork                   29
Tofurkey                   20
Chicken                    12
Roast beef                 11
I don't know                5
Turducken                   3
Name: What is typically the main dish at your Thanksgiving dinner?, dtype: int64

In [165]:
data[data['What is typically the main dish at your Thanksgiving dinner?'] == 'Tofurkey']['Do you typically have gravy?']

4      Yes
33     Yes
69      No
72      No
77     Yes
145    Yes
175    Yes
218     No
243    Yes
275     No
393    Yes
399    Yes
571    Yes
594    Yes
628     No
774     No
820     No
837    Yes
860     No
953    Yes
Name: Do you typically have gravy?, dtype: object

## Filtering Out What Pies People Eat

In [166]:
apple_isnull = data['Which type of pie is typically served at your Thanksgiving dinner? Please select all that apply. - Apple'].isnull()
pumpkin_isnull = data['Which type of pie is typically served at your Thanksgiving dinner? Please select all that apply. - Pumpkin'].isnull()
pecan_isnull = data['Which type of pie is typically served at your Thanksgiving dinner? Please select all that apply. - Pecan'].isnull()
ate_pies = apple_isnull & pumpkin_isnull & pecan_isnull
ate_pies.value_counts()

False    876
True     104
dtype: int64

## Converting Age to Numeric

In [167]:
data["Age"].value_counts()

45 - 59    269
60+        258
30 - 44    235
18 - 29    185
Name: Age, dtype: int64

In [168]:
def extract_age(age_str):
    if pd.isnull(age_str):
        return None
    age_str = age_str.split(" ")[0]
    age_str = age_str.replace("+", "")
    return int(age_str)

data["int_age"] = data["Age"].apply(extract_age)
data["int_age"].describe()

count    947.000000
mean      40.089757
std       15.352014
min       18.000000
25%       30.000000
50%       45.000000
75%       60.000000
max       60.000000
Name: int_age, dtype: float64

### Findings

We have a rough approximation of ages becuase we extracted the lower bound only. Therfore, although the ages appear to be evenly distributed, there is downward skew.

## Converting Income to Numeric

In [169]:
data["How much total combined money did all members of your HOUSEHOLD earn last year?"].value_counts()

$25,000 to $49,999      166
$50,000 to $74,999      127
$75,000 to $99,999      127
Prefer not to answer    118
$100,000 to $124,999    109
$200,000 and up          76
$10,000 to $24,999       60
$0 to $9,999             52
$125,000 to $149,999     48
$150,000 to $174,999     38
$175,000 to $199,999     26
Name: How much total combined money did all members of your HOUSEHOLD earn last year?, dtype: int64

In [170]:
def extract_income(income_str):
    if pd.isnull(income_str):
        return None
    income_str = income_str.split(" ")[0]
    if income_str == "Prefer":
        return None
    income_str = income_str.replace(",", "")
    income_str = income_str.replace("$", "")
    return int(income_str)

data["int_income"] = data["How much total combined money did all members of your HOUSEHOLD earn last year?"].apply(extract_income)
data["int_income"].describe()

count       829.000000
mean      75965.018094
std       59068.636748
min           0.000000
25%       25000.000000
50%       75000.000000
75%      100000.000000
max      200000.000000
Name: int_income, dtype: float64

### Findings

Similar to before, we have a rough approximation of incomes becuase we extracted the lower bound only. The average income seems fairly high and the average distance away from the mean is also high.

## Correlating Travel Distance and Income

In [171]:
data[data["int_income"] < 150000]["How far will you travel for Thanksgiving?"].value_counts()


Thanksgiving is happening at my home--I won't travel at all                         281
Thanksgiving is local--it will take place in the town I live in                     203
Thanksgiving is out of town but not too far--it's a drive of a few hours or less    150
Thanksgiving is out of town and far away--I have to drive several hours or fly       55
Name: How far will you travel for Thanksgiving?, dtype: int64

In [172]:
data[data["int_income"] > 150000]["How far will you travel for Thanksgiving?"].value_counts()


Thanksgiving is happening at my home--I won't travel at all                         49
Thanksgiving is local--it will take place in the town I live in                     25
Thanksgiving is out of town but not too far--it's a drive of a few hours or less    16
Thanksgiving is out of town and far away--I have to drive several hours or fly      12
Name: How far will you travel for Thanksgiving?, dtype: int64

### Findings

It appears that more people with high income have Thanksgiving at home than people with low income. This may be because younger students, who don't have a high income, tend to go home, whereas parents, who have higher incomes, don't.

## Linking Friendship And Age

In [173]:
data.pivot_table(
    index="Have you ever tried to meet up with hometown friends on Thanksgiving night?", 
    columns='Have you ever attended a "Friendsgiving?"',
    values="int_age"
)

"Have you ever attended a ""Friendsgiving?""",No,Yes
Have you ever tried to meet up with hometown friends on Thanksgiving night?,Unnamed: 1_level_1,Unnamed: 2_level_1
No,42.283702,37.010526
Yes,41.47541,33.976744


In [174]:
data.pivot_table(
    index="Have you ever tried to meet up with hometown friends on Thanksgiving night?", 
    columns='Have you ever attended a "Friendsgiving?"',
    values="int_income"
)

"Have you ever attended a ""Friendsgiving?""",No,Yes
Have you ever tried to meet up with hometown friends on Thanksgiving night?,Unnamed: 1_level_1,Unnamed: 2_level_1
No,78914.549654,72894.736842
Yes,78750.0,66019.736842


### Findings

Younger people are more likely to attend a Friendsgiving and meet up with friends on Thanksgiving.

## Next Steps
-Figure out the most common dessert people eat. <br>
-Figure out the most common complete meal people eat. <br>
-Identify how many people work on Thanksgiving. <br>
-Find regional patterns in the dinner menus. <br>
-Find age, gender, and income based patterns in dinner menus.

In [175]:
for each in data.columns:
    print(each)

RespondentID
Do you celebrate Thanksgiving?
What is typically the main dish at your Thanksgiving dinner?
What is typically the main dish at your Thanksgiving dinner? - Other (please specify)
How is the main dish typically cooked?
How is the main dish typically cooked? - Other (please specify)
What kind of stuffing/dressing do you typically have?
What kind of stuffing/dressing do you typically have? - Other (please specify)
What type of cranberry saucedo you typically have?
What type of cranberry saucedo you typically have? - Other (please specify)
Do you typically have gravy?
Which of these side dishes aretypically served at your Thanksgiving dinner? Please select all that apply. - Brussel sprouts
Which of these side dishes aretypically served at your Thanksgiving dinner? Please select all that apply. - Carrots
Which of these side dishes aretypically served at your Thanksgiving dinner? Please select all that apply. - Cauliflower
Which of these side dishes aretypically served at your Th

## Most Common Desert

In [176]:
def most_common(string):    
    import re
    comparison = 0
    for each in data.columns:
        if re.search(string, each) is not None:
            responses = data[each].value_counts()
            val = responses[0]
            if val > comparison:
                comparison = responses[0]
                common = responses
    return common
most_common('^Which of these desserts')

None    295
Name: Which of these desserts do you typically have at Thanksgiving dinner? Please select all that apply.   - None, dtype: int64

## Most Common Complete Meal

In [177]:
common_dish = most_common('^What is typically the main dish')
common_dish

Turkey                    859
Other (please specify)     35
Ham/Pork                   29
Tofurkey                   20
Chicken                    12
Roast beef                 11
I don't know                5
Turducken                   3
Name: What is typically the main dish at your Thanksgiving dinner?, dtype: int64

In [178]:
most_common('^How is the main dish typically cooked?')


Baked                     481
Roasted                   378
Other (please specify)     51
Fried                      47
I don't know               17
Name: How is the main dish typically cooked?, dtype: int64

In [179]:
most_common('^What kind of stuffing/dressing do you typically have?')

Bread-based               836
None                       60
Rice-based                 42
Other (please specify)     36
Name: What kind of stuffing/dressing do you typically have?, dtype: int64

In [180]:
most_common('^What type of cranberry saucedo you typically have?')

Canned                    502
Homemade                  301
None                      146
Other (please specify)     25
Name: What type of cranberry saucedo you typically have?, dtype: int64

In [181]:
most_common('Do you typically have gravy?')

Yes    892
No      82
Name: Do you typically have gravy?, dtype: int64

In [182]:
most_common('^Which of these side dishes aretypically served at your Thanksgiving dinner?')

Mashed potatoes    817
Name: Which of these side dishes aretypically served at your Thanksgiving dinner? Please select all that apply. - Mashed potatoes, dtype: int64

In [183]:
most_common('^Which type of pie is typically served at your Thanksgiving dinner?')

Pumpkin    729
Name: Which type of pie is typically served at your Thanksgiving dinner? Please select all that apply. - Pumpkin, dtype: int64

### Findings

The most common Thanksgiving meal is a baked turkey with a bread-based stuffing/dressing, canned cranberry sauce, mashed potatoes, gravy, pumpking pie, and no other desert.

## How Many People Work on Thanksgiving?

In [184]:
data['Will you employer make you work on Black Friday?'].value_counts()

Yes              43
No               20
Doesn't apply     7
Name: Will you employer make you work on Black Friday?, dtype: int64

## Regional Patterns in Dinner Menus

In [190]:
data.pivot_table(index = 'US Region', columns = ['What is typically the main dish at your Thanksgiving dinner?', 'What kind of stuffing/dressing do you typically have?'] )

Unnamed: 0_level_0,RespondentID,RespondentID,RespondentID,RespondentID,RespondentID,RespondentID,RespondentID,RespondentID,RespondentID,RespondentID,...,int_income,int_income,int_income,int_income,int_income,int_income,int_income,int_income,int_income,int_income
What is typically the main dish at your Thanksgiving dinner?,Chicken,Chicken,Chicken,Ham/Pork,Ham/Pork,Ham/Pork,I don't know,I don't know,Other (please specify),Other (please specify),...,Roast beef,Roast beef,Tofurkey,Tofurkey,Tofurkey,Turducken,Turkey,Turkey,Turkey,Turkey
What kind of stuffing/dressing do you typically have?,Bread-based,None,Rice-based,Bread-based,None,Rice-based,None,Other (please specify),Bread-based,None,...,None,Rice-based,Bread-based,None,Rice-based,Bread-based,Bread-based,None,Other (please specify),Rice-based
US Region,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
East North Central,,,,4336886000.0,4336949000.0,,,,4336479000.0,4337042000.0,...,,,,,,,71946.902655,50000.0,87500.0,68750.0
East South Central,,,,4336910000.0,,,,,4337197000.0,4336830000.0,...,,,,,,,70697.674419,10000.0,66666.666667,
Middle Atlantic,4336661000.0,,,4336872000.0,,,,,4336605000.0,4336471000.0,...,50000.0,,81250.0,25000.0,,,95000.0,62500.0,87500.0,40000.0
Mountain,,,4336382000.0,4336759000.0,,,,,,,...,,,137500.0,,,,88666.666667,,,0.0
New England,4336066000.0,,,,,,,,,4337188000.0,...,,,25000.0,,,,88289.473684,,125000.0,
Pacific,,,,4336514000.0,4336762000.0,,,4336162000.0,4336793000.0,4336810000.0,...,,,55000.0,,25000.0,200000.0,78928.571429,22500.0,100000.0,50000.0
South Atlantic,4336491000.0,4336092000.0,,4336812000.0,4336486000.0,,,,4336742000.0,4336146000.0,...,50000.0,25000.0,112500.0,,,,72697.841727,100000.0,86000.0,35000.0
West North Central,,,4336352000.0,4337000000.0,,4335955000.0,4336084000.0,,4336249000.0,4336752000.0,...,,,75000.0,25000.0,,,70744.680851,33333.333333,,
West South Central,4336209000.0,,,4337632000.0,,,,,4336137000.0,4337332000.0,...,,,67500.0,,,,82131.147541,100000.0,57000.0,0.0


## Age, Gender, and Income Based Patterns in Dinner Menus

In [192]:
data.pivot_table(index = ['Age', 'What is your gender?', 'int_income'], columns = ['What is typically the main dish at your Thanksgiving dinner?', 'What kind of stuffing/dressing do you typically have?'] )

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,RespondentID,RespondentID,RespondentID,RespondentID,RespondentID,RespondentID,RespondentID,RespondentID,RespondentID,RespondentID,...,int_age,int_age,int_age,int_age,int_age,int_age,int_age,int_age,int_age,int_age
Unnamed: 0_level_1,Unnamed: 1_level_1,What is typically the main dish at your Thanksgiving dinner?,Chicken,Chicken,Chicken,Ham/Pork,Ham/Pork,Ham/Pork,I don't know,I don't know,I don't know,Other (please specify),...,Roast beef,Roast beef,Tofurkey,Tofurkey,Tofurkey,Turducken,Turkey,Turkey,Turkey,Turkey
Unnamed: 0_level_2,Unnamed: 1_level_2,What kind of stuffing/dressing do you typically have?,Bread-based,None,Rice-based,Bread-based,None,Rice-based,Bread-based,None,Other (please specify),Bread-based,...,None,Rice-based,Bread-based,None,Rice-based,Bread-based,Bread-based,None,Other (please specify),Rice-based
Age,What is your gender?,int_income,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3,Unnamed: 22_level_3,Unnamed: 23_level_3
18 - 29,Female,0.0,,,,,,,,,,,...,,,,,,,18.0,18.0,,18.0
18 - 29,Female,10000.0,,,,4.336117e+09,,,,,,,...,,,18.0,,,,18.0,,,
18 - 29,Female,25000.0,,,,,,,,,,4.336137e+09,...,,,18.0,18.0,,,18.0,18.0,,
18 - 29,Female,50000.0,,,,,,,,,4.336162e+09,,...,,,,,,,18.0,,,
18 - 29,Female,75000.0,,,,,,,,,,,...,,,,,,,18.0,18.0,,18.0
18 - 29,Female,100000.0,,,,4.336835e+09,,,,,,,...,,,,,,,18.0,18.0,,
18 - 29,Female,125000.0,,,,,,,,,,,...,,,,,,,18.0,,,
18 - 29,Female,150000.0,,,,,,,,,,,...,,,,,,,18.0,18.0,,
18 - 29,Female,175000.0,,,,,,,,,,,...,,,,,,,18.0,,,
18 - 29,Female,200000.0,,,,,,,,,,,...,,,,,,,18.0,,,
