In [18]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy.stats as st
from scipy.stats import linregress
from sklearn import datasets

from pathlib import Path
from sklearn.metrics import balanced_accuracy_score, confusion_matrix, classification_report

# Study data files
coffee_path = "GACTT_RESULTS_ANONYMIZED_v2.csv"

# Read data
coffee_data = pd.read_csv(coffee_path)

# Make a DataFrame
origin_coffee_df = pd.DataFrame(coffee_data)

# Display the data table for preview
origin_coffee_df.sample(10)

Unnamed: 0,Submission ID,What is your age?,How many cups of coffee do you typically drink per day?,Where do you typically drink coffee?,Where do you typically drink coffee? (At home),Where do you typically drink coffee? (At the office),Where do you typically drink coffee? (On the go),Where do you typically drink coffee? (At a cafe),Where do you typically drink coffee? (None of these),How do you brew coffee at home?,...,Approximately how much have you spent on coffee equipment in the past 5 years?,Do you feel like you’re getting good value for your money with regards to your coffee equipment?,Gender,Gender (please specify),Education Level,Ethnicity/Race,Ethnicity/Race (please specify),Employment Status,Number of Children,Political Affiliation
3602,42PN8B,25-34 years old,3,At home,True,False,False,False,False,"Pour over, Espresso",...,$300-$500,Yes,Male,,Doctorate or professional degree,White/Caucasian,,Employed full-time,,Republican
1807,aBrMLB,25-34 years old,1,At home,True,False,False,False,False,Coffee brewing machine (e.g. Mr. Coffee),...,Less than $20,Yes,Female,,Some college or associate's degree,Hispanic/Latino,,Employed full-time,1.0,Democrat
3261,Rz8GXv,25-34 years old,Less than 1,"On the go, At a cafe",False,False,True,True,False,,...,$20-$50,Yes,Female,,Bachelor's degree,White/Caucasian,,Employed full-time,,No affiliation
344,BGbJ9Q,25-34 years old,2,"At the office, At a cafe, At home, On the go",True,True,True,True,False,Coffee brewing machine (e.g. Mr. Coffee),...,$20-$50,Yes,Female,,Master's degree,White/Caucasian,,Employed full-time,,Democrat
1926,zKQ9ME,55-64 years old,1,At the office,False,True,False,False,False,,...,$300-$500,Yes,Female,,Bachelor's degree,White/Caucasian,,Employed full-time,3.0,No affiliation
3378,oAlD8M,25-34 years old,Less than 1,None of these,False,False,False,False,True,,...,Less than $20,Yes,Male,,Master's degree,White/Caucasian,,Employed full-time,,Independent
295,g5N8pl,25-34 years old,2,"At the office, At home",True,True,False,False,False,Pour over,...,$300-$500,Yes,Male,,,,,,,
3014,1rbjQ4,18-24 years old,1,At home,True,False,False,False,False,Pour over,...,$300-$500,No,Male,,Some college or associate's degree,White/Caucasian,,Student,,Democrat
3027,RzZp4P,25-34 years old,1,"At home, At a cafe",True,False,False,True,False,"Pour over, Coffee brewing machine (e.g. Mr. Co...",...,"More than $1,000",No,Female,,Bachelor's degree,,,Unemployed,,Democrat
671,24xGAj,25-34 years old,2,"At home, At a cafe",True,False,False,True,False,"Pour over, Other",...,"More than $1,000",Yes,Male,,Some college or associate's degree,White/Caucasian,,Employed full-time,,Democrat


In [19]:
column_list =  pd.DataFrame(list(origin_coffee_df.columns.values))

print(column_list)

                                                     0
0                                        Submission ID
1                                    What is your age?
2    How many cups of coffee do you typically drink...
3                 Where do you typically drink coffee?
4       Where do you typically drink coffee? (At home)
..                                                 ...
108                                     Ethnicity/Race
109                    Ethnicity/Race (please specify)
110                                  Employment Status
111                                 Number of Children
112                              Political Affiliation

[113 rows x 1 columns]


In [20]:
coffee_df = origin_coffee_df.drop(columns=['Submission ID', 'Ethnicity/Race (please specify)',
                                           'Gender (please specify)',
                                           'Political Affiliation'])

coffee_df = coffee_df.rename(columns={"What is your age?": "Age",
                          'How many cups of coffee do you typically drink per day?': 'Cups Per Day',
                          'Where do you typically drink coffee? (At home)': 'Typically Home?',
                          'Where do you typically drink coffee? (At the office)': 'Typically Office?',
                          'Where do you typically drink coffee? (At a cafe)': 'Typically at Cafe',
                          'Where do you typically drink coffee? (None of these)': 'None of These?',
                          'Where do you typically drink coffee? (On the go)': 'Typically on the go?',
                          'How do you brew coffee at home?': 'Brew Methods',
                          'How do you brew coffee at home? (Pour over)': 'Pour Over?',
                          "What is the most you've ever paid for a cup of coffee?":'Most Paid'})



In [21]:
coffee_df.sample(10)

Unnamed: 0,Age,Cups Per Day,Where do you typically drink coffee?,Typically Home?,Typically Office?,Typically on the go?,Typically at Cafe,None of These?,Brew Methods,Pour Over?,...,Most Paid,What is the most you'd ever be willing to pay for a cup of coffee?,Do you feel like you’re getting good value for your money when you buy coffee at a cafe?,Approximately how much have you spent on coffee equipment in the past 5 years?,Do you feel like you’re getting good value for your money with regards to your coffee equipment?,Gender,Education Level,Ethnicity/Race,Employment Status,Number of Children
3322,25-34 years old,Less than 1,At home,True,False,False,False,False,"Instant coffee, Other",False,...,$6-$8,$8-$10,No,Less than $20,Yes,Male,Bachelor's degree,Other (please specify),Employed part-time,
1055,25-34 years old,1,"At a cafe, At home",True,False,False,True,False,Other,False,...,$10-$15,$8-$10,Yes,$50-$100,Yes,Female,Bachelor's degree,White/Caucasian,Employed full-time,
1518,25-34 years old,1,"At home, At the office",True,True,False,False,False,French press,False,...,$4-$6,$6-$8,No,$300-$500,Yes,Male,Bachelor's degree,White/Caucasian,Employed full-time,
2004,35-44 years old,1,"At the office, At home",True,True,False,False,False,French press,False,...,$8-$10,$10-$15,No,$100-$300,Yes,Female,Bachelor's degree,White/Caucasian,Employed part-time,
1172,25-34 years old,2,At home,True,False,False,False,False,Espresso,False,...,$10-$15,$15-$20,Yes,"More than $1,000",Yes,Non-binary,Bachelor's degree,White/Caucasian,Employed full-time,
3435,18-24 years old,1,"At home, On the go, At the office",True,True,True,False,False,"French press, Espresso, Coffee brewing machine...",False,...,$6-$8,$8-$10,No,$100-$300,Yes,Female,Some college or associate's degree,Hispanic/Latino,Employed full-time,
3931,45-54 years old,More than 4,At home,True,False,False,False,False,"Pour over, Coffee brewing machine (e.g. Mr. Co...",True,...,$8-$10,$4-$6,Yes,"More than $1,000",Yes,Male,High school graduate,White/Caucasian,Employed full-time,3.0
3429,25-34 years old,2,"At a cafe, At home",True,False,False,True,False,"Pour over, Espresso",True,...,$6-$8,$10-$15,No,$300-$500,Yes,Male,Bachelor's degree,Other (please specify),Employed part-time,
27,,,,False,False,False,False,False,,,...,,,,,,,,,,
498,25-34 years old,1,At the office,False,True,False,False,False,,,...,$8-$10,$8-$10,Yes,$50-$100,Yes,Female,Some college or associate's degree,White/Caucasian,Employed full-time,


In [22]:
coffee_column_list = pd.DataFrame(list(coffee_df.columns.values))

coffee_column_list.to_csv('coffee_columns.csv')

In [23]:
coffee_analysis_df = coffee_df[['Age', 'Cups Per Day', 'Where do you typically drink coffee?', 'Typically Home?', 
                                'Typically Office?', 'Typically on the go?', 'Typically at Cafe', 'Brew Methods', 'Pour Over?', 
                                'How do you brew coffee at home? (French press)', 'How do you brew coffee at home? (Espresso)', 
                                'How do you brew coffee at home? (Coffee brewing machine (e.g. Mr. Coffee))', 'How do you brew coffee at home? (Pod/capsule machine (e.g. Keurig/Nespresso))', 
                                'How do you brew coffee at home? (Instant coffee)', 'How do you brew coffee at home? (Bean-to-cup machine)', 
                                'How do you brew coffee at home? (Cold brew)', 
                                'How do you brew coffee at home? (Coffee extract (e.g. Cometeer))', 'How do you brew coffee at home? (Other)', 'What is your favorite coffee drink?', 
                                'How strong do you like your coffee?', 'What roast level of coffee do you prefer?', 'How much caffeine do you like in your coffee?', 
                                'Between Coffee A, Coffee B, and Coffee C which did you prefer?', 'Do you work from home or in person?', 'Why do you drink coffee?', 'Gender', 'Ethnicity/Race', 'Employment Status']].copy()


In [24]:
coffee_analysis_df = coffee_analysis_df.rename(columns={'How do you brew coffee at home? (French press)': 'French Press?',
                                   'How do you brew coffee at home? (Espresso)': 'Espresso?',
                                   'How do you brew coffee at home? (Coffee brewing machine (e.g. Mr. Coffee))': 'Mr. Coffee',
                                   'How do you brew coffee at home? (Pod/capsule machine (e.g. Keurig/Nespresso))': 'Pods',
                                   'How do you brew coffee at home? (Instant coffee)': 'Instant',
                                   'How do you brew coffee at home? (Bean-to-cup machine)': 'Bean to Cup',
                                   'How do you brew coffee at home? (Cold brew)': 'Cold Brew',
                                   'What is your favorite coffee drink?': 'Favorite Form',
                                   'How strong do you like your coffee?': 'Strength?',
                                   'What roast level of coffee do you prefer?': 'Roast Preference',
                                   'How much caffeine do you like in your coffee?': 'How Much Caffeine?',
                                   'Between Coffee A, Coffee B, and Coffee C which did you prefer?': 'A, B, C',
                                   'Do you work from home or in person?': 'WFH',
                                   'Why do you drink coffee?': 'Why'})

coffee_analysis_df = coffee_analysis_df.drop(columns=['How do you brew coffee at home? (Coffee extract (e.g. Cometeer))', 
                                                      'How do you brew coffee at home? (Other)'])

In [25]:
coffee_analysis_df.sample(10)

Unnamed: 0,Age,Cups Per Day,Where do you typically drink coffee?,Typically Home?,Typically Office?,Typically on the go?,Typically at Cafe,Brew Methods,Pour Over?,French Press?,...,Favorite Form,Strength?,Roast Preference,How Much Caffeine?,"A, B, C",WFH,Why,Gender,Ethnicity/Race,Employment Status
2227,18-24 years old,4,At home,True,False,False,False,Pour over,True,False,...,Regular drip coffee,Somewhat strong,Light,Full caffeine,Coffee B,I do a mix of both,"It tastes good, I need the caffeine, I need th...",Other (please specify),White/Caucasian,Employed full-time
2522,35-44 years old,2,"At home, At the office, On the go, At a cafe",True,True,True,True,"Pour over, French press, Cold brew, Other",True,True,...,Other,Somewhat strong,Medium,Half caff,,,,,,
561,25-34 years old,1,At home,True,False,False,False,"Pour over, Espresso",True,False,...,Espresso,Somewhat strong,Light,Full caffeine,Coffee A,I primarily work from home,"It tastes good, I need the ritual",Male,White/Caucasian,Employed full-time
2283,25-34 years old,2,At home,True,False,False,False,"Coffee brewing machine (e.g. Mr. Coffee), Espr...",False,False,...,Cortado,Medium,Medium,Full caffeine,Coffee C,I do a mix of both,It tastes good,Female,White/Caucasian,Employed full-time
2020,25-34 years old,2,"At home, At a cafe",True,False,False,True,"Pour over, Espresso",True,False,...,Cappuccino,Medium,Light,Full caffeine,Coffee A,I primarily work from home,"It tastes good, I need the caffeine, I need th...",Male,White/Caucasian,Employed full-time
2601,25-34 years old,1,At home,True,False,False,False,Coffee brewing machine (e.g. Mr. Coffee),False,False,...,Americano,Somewhat strong,Medium,Full caffeine,Coffee C,I primarily work in person,It tastes good,Male,White/Caucasian,Employed full-time
1434,25-34 years old,1,At home,True,False,False,False,"Pour over, Coffee extract (e.g. Cometeer)",True,False,...,Pourover,Medium,Light,Full caffeine,Coffee A,I primarily work from home,It tastes good,Female,White/Caucasian,Employed part-time
3810,18-24 years old,1,At the office,False,True,False,False,,,,...,Latte,Somewhat light,Light,Decaf,Coffee B,I do a mix of both,It tastes good,Female,,Employed full-time
2835,35-44 years old,Less than 1,"At a cafe, At home",True,False,False,True,Pour over,True,False,...,Blended drink (e.g. Frappuccino),Somewhat strong,Light,Full caffeine,Coffee A,I primarily work in person,It tastes good,Female,White/Caucasian,Employed part-time
1411,18-24 years old,2,At home,True,False,False,False,"Espresso, Pour over, French press, Coffee brew...",True,True,...,Pourover,Medium,Medium,Full caffeine,Coffee A,I do a mix of both,"It tastes good, I need the ritual",Male,White/Caucasian,Employed full-time


In [26]:
coffee_Analysis_column_list = pd.DataFrame(list(coffee_analysis_df.columns.values))

coffee_Analysis_column_list.to_csv('coffee_analysis_df.csv')

In [27]:
wfh_df = coffee_analysis_df[['Age', 'WFH', 'Cups Per Day', 'Brew Methods', 'Strength?', 'How Much Caffeine?', 'Why', 'Favorite Form']].copy()

wfh_df.sample(10)

Unnamed: 0,Age,WFH,Cups Per Day,Brew Methods,Strength?,How Much Caffeine?,Why,Favorite Form
2125,45-54 years old,I primarily work in person,3,Coffee brewing machine (e.g. Mr. Coffee),Somewhat strong,Full caffeine,"It tastes good, I need the ritual",Americano
1869,25-34 years old,I primarily work from home,2,"Espresso, Pour over",Somewhat strong,Full caffeine,"It tastes good, I need the caffeine, I need th...",Espresso
1150,25-34 years old,I primarily work in person,1,"Coffee brewing machine (e.g. Mr. Coffee), Other",Somewhat strong,Full caffeine,It tastes good,Cappuccino
3280,25-34 years old,I primarily work from home,Less than 1,"Pour over, Espresso, Cold brew",Very strong,Full caffeine,It tastes good,Latte
1875,35-44 years old,I primarily work in person,1,Cold brew,Medium,Full caffeine,It tastes good,Blended drink (e.g. Frappuccino)
922,35-44 years old,I primarily work from home,1,Espresso,Medium,Full caffeine,"It tastes good, I need the caffeine, It makes ...",Latte
3866,25-34 years old,I primarily work in person,2,"Pour over, Espresso",Somewhat strong,Full caffeine,"It tastes good, I need the caffeine, I need th...",Regular drip coffee
2323,35-44 years old,I primarily work from home,1,Other,Very strong,Full caffeine,"It tastes good, I need the caffeine, I need th...",Cortado
2598,25-34 years old,I primarily work in person,3,"Pour over, Espresso, Coffee extract (e.g. Come...",Somewhat strong,Full caffeine,"It tastes good, I need the caffeine, I need th...",Pourover
710,25-34 years old,I do a mix of both,3,"Pour over, Espresso",Somewhat strong,Full caffeine,"It tastes good, I need the caffeine, I need th...",Americano


In [28]:
wfh_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4042 entries, 0 to 4041
Data columns (total 8 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Age                 4011 non-null   object
 1   WFH                 3524 non-null   object
 2   Cups Per Day        3949 non-null   object
 3   Brew Methods        3657 non-null   object
 4   Strength?           3916 non-null   object
 5   How Much Caffeine?  3917 non-null   object
 6   Why                 3568 non-null   object
 7   Favorite Form       3980 non-null   object
dtypes: object(8)
memory usage: 252.8+ KB


In [29]:
#Drop NaN rows, we want complete results

wfh_nona_df = wfh_df.dropna()

In [30]:
#Values of cups "more than 4" will be labeled as 5 in our dataset, and less than one will be labled as zero, less than 1 cup so they have had zero full cups 

wfh_nona_df['Cups Per Day'] = wfh_nona_df['Cups Per Day'].str.replace('More than 4', '5')
wfh_nona_df['Cups Per Day'] = wfh_nona_df['Cups Per Day'].str.replace('Less than 1', '0')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wfh_nona_df['Cups Per Day'] = wfh_nona_df['Cups Per Day'].str.replace('More than 4', '5')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wfh_nona_df['Cups Per Day'] = wfh_nona_df['Cups Per Day'].str.replace('Less than 1', '0')


In [31]:
wfh_nona_df['Cups Per Day'] = pd.to_numeric(wfh_nona_df['Cups Per Day'], errors='raise')

wfh_nona_df = wfh_nona_df.dropna()

display(wfh_nona_df.sample(10))
display(wfh_nona_df.info())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wfh_nona_df['Cups Per Day'] = pd.to_numeric(wfh_nona_df['Cups Per Day'], errors='raise')


Unnamed: 0,Age,WFH,Cups Per Day,Brew Methods,Strength?,How Much Caffeine?,Why,Favorite Form
1321,55-64 years old,I primarily work in person,2,"French press, Coffee brewing machine (e.g. Mr....",Medium,Full caffeine,"It tastes good, I need the caffeine, I need th...",Regular drip coffee
2845,25-34 years old,I do a mix of both,2,Pour over,Somewhat strong,Full caffeine,"It tastes good, I need the ritual",Cappuccino
1596,45-54 years old,I primarily work from home,2,Coffee brewing machine (e.g. Mr. Coffee),Medium,Half caff,"It tastes good, I need the ritual",Cappuccino
380,45-54 years old,I do a mix of both,3,"Pod/capsule machine (e.g. Keurig/Nespresso), P...",Medium,Full caffeine,"I need the caffeine, It tastes good",Pourover
862,25-34 years old,I primarily work from home,0,Espresso,Somewhat strong,Full caffeine,"Other, It tastes good",Cappuccino
2758,25-34 years old,I primarily work from home,2,Pour over,Somewhat light,Full caffeine,"It tastes good, I need the caffeine, I need th...",Pourover
3097,35-44 years old,I primarily work from home,2,"Espresso, Pour over",Somewhat strong,Full caffeine,It tastes good,Espresso
1329,25-34 years old,I primarily work in person,2,"Pour over, Espresso",Somewhat light,Full caffeine,"I need the ritual, It tastes good",Pourover
3320,35-44 years old,I primarily work from home,2,"Pour over, Espresso",Medium,Full caffeine,"It tastes good, I need the caffeine, I need th...",Cortado
3593,25-34 years old,I primarily work from home,0,"Pour over, French press",Somewhat strong,Full caffeine,"I need the caffeine, It tastes good",Americano


<class 'pandas.core.frame.DataFrame'>
Int64Index: 3207 entries, 34 to 4041
Data columns (total 8 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Age                 3207 non-null   object
 1   WFH                 3207 non-null   object
 2   Cups Per Day        3207 non-null   int64 
 3   Brew Methods        3207 non-null   object
 4   Strength?           3207 non-null   object
 5   How Much Caffeine?  3207 non-null   object
 6   Why                 3207 non-null   object
 7   Favorite Form       3207 non-null   object
dtypes: int64(1), object(7)
memory usage: 225.5+ KB


None

In [32]:
display(wfh_nona_df.sample(10))
display(wfh_nona_df.info())


Unnamed: 0,Age,WFH,Cups Per Day,Brew Methods,Strength?,How Much Caffeine?,Why,Favorite Form
118,25-34 years old,I primarily work in person,2,"Pour over, Espresso",Medium,Full caffeine,I need the ritual,Pourover
3057,45-54 years old,I do a mix of both,1,"Espresso, Pod/capsule machine (e.g. Keurig/Nes...",Medium,Full caffeine,"I need the caffeine, I need the ritual",Latte
3858,25-34 years old,I do a mix of both,1,"Pour over, Other",Medium,Full caffeine,"It tastes good, I need the ritual",Pourover
2578,25-34 years old,I primarily work in person,1,Other,Somewhat strong,Full caffeine,"It tastes good, I need the caffeine",Cortado
1137,35-44 years old,I primarily work in person,2,"Coffee brewing machine (e.g. Mr. Coffee), Pour...",Somewhat strong,Full caffeine,"It tastes good, I need the caffeine, I need th...",Pourover
3630,35-44 years old,I primarily work from home,2,"French press, Espresso, Coffee brewing machine...",Somewhat strong,Full caffeine,"It tastes good, I need the caffeine, I need th...",Latte
2401,25-34 years old,I do a mix of both,2,"Pour over, Espresso, Instant coffee, Bean-to-c...",Somewhat strong,Full caffeine,"I need the caffeine, It tastes good, It makes ...",Pourover
3524,25-34 years old,I do a mix of both,1,"Pour over, Espresso, Pod/capsule machine (e.g....",Somewhat strong,Full caffeine,"It tastes good, I need the caffeine",Pourover
3795,25-34 years old,I primarily work from home,2,Pour over,Medium,Full caffeine,"It tastes good, I need the ritual",Pourover
2608,25-34 years old,I primarily work from home,2,"Cold brew, Espresso, French press",Very strong,Full caffeine,"It tastes good, I need the caffeine, It makes ...",Americano


<class 'pandas.core.frame.DataFrame'>
Int64Index: 3207 entries, 34 to 4041
Data columns (total 8 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Age                 3207 non-null   object
 1   WFH                 3207 non-null   object
 2   Cups Per Day        3207 non-null   int64 
 3   Brew Methods        3207 non-null   object
 4   Strength?           3207 non-null   object
 5   How Much Caffeine?  3207 non-null   object
 6   Why                 3207 non-null   object
 7   Favorite Form       3207 non-null   object
dtypes: int64(1), object(7)
memory usage: 225.5+ KB


None

In [33]:
#Categories are <18 (0), 18-24 (18), 25-34 (25), 35-44 (35), 45-54 (45), 55-64 (55), 65< (65)

wfh_nona_df['Age'] = wfh_nona_df['Age'].str.replace('<18 years old', '0')
wfh_nona_df['Age'] = wfh_nona_df['Age'].str.replace('18-24 years old', '18')
wfh_nona_df['Age'] = wfh_nona_df['Age'].str.replace('25-34 years old', '25')
wfh_nona_df['Age'] = wfh_nona_df['Age'].str.replace('35-44 years old', '35')
wfh_nona_df['Age'] = wfh_nona_df['Age'].str.replace('45-54 years old', '45')
wfh_nona_df['Age'] = wfh_nona_df['Age'].str.replace('55-64 years old', '55')
wfh_nona_df['Age'] = wfh_nona_df['Age'].str.replace('>65 years old', '65')

In [35]:
wfh_nona_df.sample(10)

Unnamed: 0,Age,WFH,Cups Per Day,Brew Methods,Strength?,How Much Caffeine?,Why,Favorite Form
1026,35,I primarily work from home,2,Pour over,Somewhat strong,Full caffeine,"It tastes good, I need the ritual",Americano
3483,25,I do a mix of both,1,"Espresso, Pour over",Very strong,Full caffeine,"It tastes good, I need the caffeine, I need th...",Pourover
1221,25,I primarily work in person,3,"Pod/capsule machine (e.g. Keurig/Nespresso), E...",Somewhat strong,Full caffeine,"It tastes good, I need the caffeine",Espresso
2965,25,I do a mix of both,2,Other,Somewhat strong,Full caffeine,It tastes good,Latte
2474,35,I do a mix of both,2,Pour over,Medium,Full caffeine,"It tastes good, Other",Pourover
2098,25,I primarily work from home,3,Espresso,Somewhat strong,Full caffeine,"It tastes good, I need the ritual",Latte
116,25,I do a mix of both,0,Pour over,Weak,Decaf,It tastes good,Latte
537,25,I do a mix of both,2,"Pour over, Espresso",Somewhat strong,Full caffeine,"It tastes good, I need the caffeine, I need th...",Regular drip coffee
101,25,I primarily work in person,2,"Pour over, Espresso, Cold brew",Somewhat light,Full caffeine,"It tastes good, I need the ritual",Pourover
3584,25,I primarily work from home,1,"Pour over, French press, Pod/capsule machine (...",Somewhat strong,Full caffeine,It tastes good,Pourover


In [36]:
wfh_nona_df['Age'] = pd.to_numeric(wfh_nona_df['Age'], errors='raise')

wfh_nona_df = wfh_nona_df.dropna()

display(wfh_nona_df.sample(10))
display(wfh_nona_df.info())

Unnamed: 0,Age,WFH,Cups Per Day,Brew Methods,Strength?,How Much Caffeine?,Why,Favorite Form
3931,45,I primarily work in person,5,"Pour over, Coffee brewing machine (e.g. Mr. Co...",Somewhat strong,Full caffeine,It tastes good,Americano
2266,25,I do a mix of both,2,Pour over,Somewhat strong,Full caffeine,"It tastes good, I need the caffeine",Cortado
2786,25,I primarily work from home,1,Espresso,Somewhat strong,Decaf,It tastes good,Latte
3016,25,I primarily work in person,2,"Coffee brewing machine (e.g. Mr. Coffee), Othe...",Somewhat strong,Full caffeine,"It tastes good, I need the caffeine, I need th...",Regular drip coffee
1389,35,I do a mix of both,2,"Pour over, French press, Espresso",Very strong,Full caffeine,"It tastes good, I need the caffeine",Cappuccino
448,25,I primarily work from home,3,"Pour over, French press, Coffee brewing machin...",Very strong,Full caffeine,"It tastes good, I need the caffeine, I need th...",Pourover
2894,25,I do a mix of both,0,"French press, Coffee brewing machine (e.g. Mr....",Somewhat strong,Half caff,"It tastes good, I need the caffeine, I need th...",Regular drip coffee
1784,25,I primarily work from home,1,Pour over,Medium,Full caffeine,"I need the ritual, It tastes good, I need the ...",Pourover
3719,35,I do a mix of both,2,French press,Very strong,Full caffeine,"It tastes good, I need the caffeine",Pourover
1058,18,I do a mix of both,2,"Pour over, Espresso",Somewhat strong,Full caffeine,It tastes good,Cappuccino


<class 'pandas.core.frame.DataFrame'>
Int64Index: 3207 entries, 34 to 4041
Data columns (total 8 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Age                 3207 non-null   int64 
 1   WFH                 3207 non-null   object
 2   Cups Per Day        3207 non-null   int64 
 3   Brew Methods        3207 non-null   object
 4   Strength?           3207 non-null   object
 5   How Much Caffeine?  3207 non-null   object
 6   Why                 3207 non-null   object
 7   Favorite Form       3207 non-null   object
dtypes: int64(2), object(6)
memory usage: 225.5+ KB


None