In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', None)

In [2]:
#load the dataset and inspect first 5 rows
jeopardy=pd.read_csv('jeopardy.csv')

jeopardy.head()

Unnamed: 0,Show Number,Air Date,Round,Category,Value,Question,Answer
0,4680,2004-12-31,Jeopardy!,HISTORY,$200,"For the last 8 years of his life, Galileo was under house arrest for espousing this man's theory",Copernicus
1,4680,2004-12-31,Jeopardy!,ESPN's TOP 10 ALL-TIME ATHLETES,$200,"No. 2: 1912 Olympian; football star at Carlisle Indian School; 6 MLB seasons with the Reds, Giants & Braves",Jim Thorpe
2,4680,2004-12-31,Jeopardy!,EVERYBODY TALKS ABOUT IT...,$200,"The city of Yuma in this state has a record average of 4,055 hours of sunshine each year",Arizona
3,4680,2004-12-31,Jeopardy!,THE COMPANY LINE,$200,"In 1963, live on ""The Art Linkletter Show"", this company served its billionth burger",McDonald's
4,4680,2004-12-31,Jeopardy!,EPITAPHS & TRIBUTES,$200,"Signer of the Dec. of Indep., framer of the Constitution of Mass., second President of the United States",John Adams


In [3]:
print(jeopardy.columns)

Index(['Show Number', ' Air Date', ' Round', ' Category', ' Value',
       ' Question', ' Answer'],
      dtype='object')


In [4]:
#we can notice that all but the first column have leading spaces in their names, let's remove them
new_col=['Show Number','Air Date','Round','Category','Value','Question','Answer']
jeopardy.columns=new_col
print(jeopardy.columns)

Index(['Show Number', 'Air Date', 'Round', 'Category', 'Value', 'Question',
       'Answer'],
      dtype='object')


In [5]:
jeopardy.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 216930 entries, 0 to 216929
Data columns (total 7 columns):
 #   Column       Non-Null Count   Dtype 
---  ------       --------------   ----- 
 0   Show Number  216930 non-null  int64 
 1   Air Date     216930 non-null  object
 2   Round        216930 non-null  object
 3   Category     216930 non-null  object
 4   Value        216930 non-null  object
 5   Question     216930 non-null  object
 6   Answer       216928 non-null  object
dtypes: int64(1), object(6)
memory usage: 11.6+ MB


In [6]:
#create a function that filters the dataset based on questions that contain specific words
def filter_question(data, wordlist):
    lower_wordlist=[word.lower() for word in wordlist] #lowercases all words in the list
    filter = lambda x: all(word in x.lower() for word in lower_wordlist)
    filtered=data.loc[data['Question'].apply(filter)]
    return filtered

In [7]:
#test the function
words=['King', 'England']
filter_question(jeopardy,words) #152 matches as expected

Unnamed: 0,Show Number,Air Date,Round,Category,Value,Question,Answer
4953,3003,1997-09-24,Double Jeopardy!,"""PH""UN WORDS",$200,"Both England's King George V & FDR put their stamp of approval on this ""King of Hobbies""",Philately (stamp collecting)
6337,3517,1999-12-14,Double Jeopardy!,Y1K,$800,"In retaliation for Viking raids, this ""Unready"" king of England attacks Norse areas of the Isle of Man",Ethelred
9191,3907,2001-09-04,Double Jeopardy!,WON THE BATTLE,$800,This king of England beat the odds to trounce the French in the 1415 Battle of Agincourt,Henry V
11710,2903,1997-03-26,Double Jeopardy!,BRITISH MONARCHS,$600,"This Scotsman, the first Stuart king of England, was called ""The Wisest Fool in Christendom""",James I
13454,4726,2005-03-07,Jeopardy!,A NUMBER FROM 1 TO 10,$1000,It's the number that followed the last king of England named William,4
...,...,...,...,...,...,...,...
208295,4621,2004-10-11,Jeopardy!,THE VIKINGS,$600,In 1066 this great-great grandson of Rollo made what some call the last Viking invasion of England,William the Conqueror
208742,4863,2005-11-02,Double Jeopardy!,BEFORE & AFTER,"$3,000",Dutch-born king who ruled England jointly with Mary II & is a tasty New Zealand fish,William of Orange roughy
213870,5856,2010-02-15,Double Jeopardy!,URANUS,$1600,In 1781 William Herschel discovered Uranus & initially named it after this king of England,George III
216021,1881,1992-11-09,Double Jeopardy!,HISTORIC NAMES,$1000,"His nickname was ""Bertie"", but he used this name & number when he became king of England in 1901",Edward VII


In [8]:
# remove '$' and ',' and any spaces from Value column before converting to numeric
jeopardy['Value']=jeopardy['Value'].apply(lambda x: x.replace('$','').replace(',','').strip())
#convert Value column to numeric values
jeopardy['Value']=jeopardy['Value'].apply( lambda x: float(x) if x!='None' else 0)

In [9]:
jeopardy.head() #make sure Value column is now converted to float

Unnamed: 0,Show Number,Air Date,Round,Category,Value,Question,Answer
0,4680,2004-12-31,Jeopardy!,HISTORY,200.0,"For the last 8 years of his life, Galileo was under house arrest for espousing this man's theory",Copernicus
1,4680,2004-12-31,Jeopardy!,ESPN's TOP 10 ALL-TIME ATHLETES,200.0,"No. 2: 1912 Olympian; football star at Carlisle Indian School; 6 MLB seasons with the Reds, Giants & Braves",Jim Thorpe
2,4680,2004-12-31,Jeopardy!,EVERYBODY TALKS ABOUT IT...,200.0,"The city of Yuma in this state has a record average of 4,055 hours of sunshine each year",Arizona
3,4680,2004-12-31,Jeopardy!,THE COMPANY LINE,200.0,"In 1963, live on ""The Art Linkletter Show"", this company served its billionth burger",McDonald's
4,4680,2004-12-31,Jeopardy!,EPITAPHS & TRIBUTES,200.0,"Signer of the Dec. of Indep., framer of the Constitution of Mass., second President of the United States",John Adams


In [10]:
words=['King']
print(filter_question(jeopardy,words)['Value'].mean()) #the average value of a question that contains the word 'king' is $772

771.8833850722094


In [11]:
# function that returns the count of the unique answers to all of the questions in a subset after word filter is applied
def answer_count(data,wordlist):
    subset=filter_question(data, wordlist)
    return subset['Answer'].value_counts(dropna=False)

In [12]:
answer_count(jeopardy,['King'])

Henry VIII                            55
Solomon                               35
Richard III                           33
Louis XIV                             31
David                                 30
                                      ..
"The Thin Man"                         1
porcelain (or pottery or ceramics)     1
Lady Bird Johnson                      1
Rick James Fenimore Cooper             1
Thomas Alva Edison                     1
Name: Answer, Length: 5268, dtype: int64