In [None]:
import pandas as pd
from sklearn.preprocessing import normalize
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.cluster import AgglomerativeClustering
import matplotlib.pyplot as plt
from pathlib import Path

In [None]:
file = Path('Resources/survey_results.xlsx')

In [None]:
results_df = pd.read_excel(file)
results_df.head()

In [None]:
#RENAME COLUMNS
survey_df = results_df.rename(columns={
    "ID":"id",
    "Start time": "start_time",
    "Completion time": "end_time",
    "Email": "email",
    "Name": "name",	
    "What is your age range?": "age",
    "Gender":"gender",	
    "Race/Ethnicity (Select all that apply)": "race",	
    "Which US Geographic Region do you live in?": "region",	
    "What is the name of your favorite TV Show?\n": "tv_show",
    "How often do you eat vegetables?": "frequency",
    "Types of veggies you most like to eat (pick your top 5!)": "to_eat",
    'What is your "go to" choice for veggies at home? (Pick your top three)': "at_home",
    'What is your "go to" choice for veggies at a restaurant (top three)': "at_restaurant",	
    "Preferred ways for cooking/preparing the choices you selected above": "cook_prep",
    "What alternative to vegetables do you use/eat to stay healthy?": "alternatives",
    "What are your top three favorite types of fruits you like to eat? (example: apples, bananas, oranges)": "fruits"})

In [None]:
#SPLIT COLUMNS WITH MULTIPLE SELECTIONS

to_eat_test = survey_df["to_eat"].str.split(';', expand=True)
drop_list_five = [0,1,2,3,4]
drop_list_three = [0,1,2]

top_to_eat = to_eat_test.drop(to_eat_test.columns.difference(drop_list_five), axis=1)


top_to_eat = top_to_eat.rename(columns={0:"to_eat_1", 1: "to_eat_2", 2: "to_eat_3", 3: "to_eat_4", 4: "to_eat_5"})
top_to_eat
# top_five = to_eat_test[[0,1,2,3,4]]
# top_five


In [None]:
survey_df['to_eat_'{i+1}'] = top_to_eat[i]
survey_df

In [69]:
survey_df["tv_show"] = survey_df["tv_show"].str.upper()
survey_df

Unnamed: 0,id,start_time,end_time,email,name,age,gender,race,region,tv_show,frequency,to_eat,at_home,at_restaurant,cook_prep,alternatives,fruits,to_eat_1
0,1,2022-06-04 15:01:02,2022-06-04 15:05:26,anonymous,,40-49,Female,"Caucasian, non-Hispanic;",Midwest,FIREFLY,1/week,Asparagus ;Corn;Carrots;Spinach;Broccoli;,Lettuce (any kind);Carrots;Spinach;,Asparagus;Corn;Broccoli;,Grilled;Sauteed;Baked;,,"Cherries, blueberries, pineapple",Asparagus
1,2,2022-06-06 19:20:23,2022-06-06 19:22:18,anonymous,,20-29,Female,Asian;,Midwest,VINCENZO,4-5/week,Broccoli;Cauliflower;Corn;Green Beans;Peas;Spi...,Broccoli;Corn;Peas;,Broccoli;Cauliflower;Corn;,Fried/Air-fried;Grilled;Sauteed;Microwaved;,,Mangoes cherries oranges,Broccoli
2,3,2022-06-06 19:19:21,2022-06-06 19:22:26,anonymous,,40-49,Female,"Caucasian, non-Hispanic;",Midwest,TED LASSO,Daily 6-7 week,Broccoli;Carrots;Green Beans;Spinach;Squash;,Broccoli;Carrots;Squash;,Lettuce (any kind);Broccoli;Spinach;,Steamed;Baked;,,"bananas, strawberries, mangos",Broccoli
3,4,2022-06-06 19:20:24,2022-06-06 19:25:18,anonymous,,20-29,Female,Black or African American;,Midwest,SUCCESSION,2-3/week,Broccoli;Carrots;Corn;Green Beans;Cauliflower;,Broccoli;Carrots;Cauliflower;,Broccoli;Carrots;Green Beans;,Baked;Fried/Air-fried;Steamed;,,"Bananas, strawberries, oranges",Broccoli
4,5,2022-06-06 19:28:42,2022-06-06 19:35:54,anonymous,,30-39,Female,"Caucasian, non-Hispanic;",Midwest,CRITICAL ROLE,2-3/week,Carrots;Spinach;Squash;Corn;Cauliflower;,Spinach;Carrots;Broccoli;,Spinach;Carrots;Squash;,Baked;Sauteed;,,"Peaches, Oranges, Mangoes",Carrots
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
156,157,2022-06-07 15:11:42,2022-06-07 15:13:43,anonymous,,20-29,Female,Asian;,Midwest,90 DAY FIANCE,Daily 6-7 week,Lettuce (any kind);Spinach;Corn;asparagus;Carr...,Lettuce (any kind);Spinach;Carrots;,brussel sprouts;Spinach;Corn;,Fried/Air-fried;Sauteed;,,"blueberries, strawberries, grapes",Lettuce (any kind)
157,158,2022-06-07 15:47:11,2022-06-07 15:51:49,anonymous,,30-39,Female,"Caucasian, non-Hispanic;",Midwest,GILMORE GIRLS,Daily 6-7 week,Spinach;Lettuce (any kind);Carrots;Broccoli;Sq...,Broccoli;Spinach;Lettuce (any kind);,Squash;Spinach;Cauliflower;,Baked;Sauteed;Steamed;,,"Bananas, apples, grapes",Spinach
158,159,2022-06-07 15:59:49,2022-06-07 16:03:12,anonymous,,40-49,Female,"Caucasian, non-Hispanic;",Midwest,CHICAGO FIRE,Daily 6-7 week,Lettuce (any kind);Corn;Cauliflower;Carrots;Cu...,Peas;Broccoli;Cauliflower;,Asparagus;,Fried/Air-fried;Grilled;Steamed;,,"Bananas, grapes, apples",Lettuce (any kind)
159,160,2022-06-07 16:37:15,2022-06-07 16:39:20,anonymous,,30-39,Female,"Caucasian, non-Hispanic;",Midwest,BONES,2-3/week,Carrots;Corn;Peas;Spinach;Green Beans;,Carrots;Corn;Peas;,Carrots;Green Beans;Spinach;,Steamed;,,"Apples, oranges, pineapple",Carrots


In [70]:
#Dependencies for API
import requests
import json

from api_key import movie_api

In [81]:
shows = survey_df["tv_show"].tolist()
print(shows)

['FIREFLY', 'VINCENZO', 'TED LASSO', 'SUCCESSION', 'CRITICAL ROLE', 'GOSSIP GIRL', 'DEMON SLAYER', 'WEST WING', 'FRIENDS', 'NCIS', 'NCIS', 'THE OFFICE', 'TOP CHEF', 'OUTLANDER', 'NEW GIRL', 'TWIN PEAKS', 'STRANGER THINGS', 'NONE', 'CHICAGO MED', 'NEW GIRL', 'BATTLEBOTS', 'BREAKING BAD', 'FAMILY FEUD ', 'CHICAGO MED, FIRE, PD', 'THE OFFICE', 'RICK AND MORTY', "HELL'S KITCHEN ", 'BOB’S BURGERS', 'THIS IS US', 'SPORTS', 'DANCING WITH THE STARS', 'HOW I MET YOUR MOTHER', 'GREY’S ANATOMY ', 'OUR FLAG MEANS DEATH', 'CHICAGO SERIES', 'FRIENDS', 'HANNIBAL', 'STRANGER THINGS', 'NOVA', 'CHICAGO FIRE', 'THE BOYS', 'THE OFFICE', "SCHITT'S CREEK", 'STAR TREK ', 'THE OFFICE', 'MONDAY NIGHT FOOTBALL ', 'CHICAGO PD', 'BLACKLIST', 'STATION ELEVEN', 'THE FIRST LADY', 'JEOPARDY ', 'HGTV SHOWS', 'WALKING DEAD', 'YOU', 'ALL AMERICAN ', 'COSBY SHOW', 'STRANGER THINGS', 'THE L WORD', 'THE WIRE', 'FIREFLY', "I DON'T HAVE ONE", 'THE SIMPSONS', 'SUPERNATURAL', 'FRIENDS ', 'JANE THE VIRGIN ', 'DATELINE', 'STRANG

In [96]:
len(broken)

48

In [94]:
movie_url = "https://api.themoviedb.org/3/search/movie?api_key="+movie_api

ids = []
titles = []
genre_id = []
test = []

for movie in broken:
    search = requests.get(movie_url + "&query="+movie).json()
    # search = requests.get(movie_url + "&query= CRITICAL ROLE").json()
    try:
        movie_id = search['results'][0]['id']
        titles.append(movie)
        print(search['results'][0])
        # cast_url = "https://api.themoviedb.org/3/movie/"+str(movie_id)+"/credits?api_key="+movie_api
        # response = requests.get(cast_url).json()
    
        # char_count.append(len(response['cast']))
    
        #Pulls character list for each movie and adds the movie id into the 'ids' list
        # for i in range(0,(len(response['cast']))):

        #     chars.append(response['cast'][i]['character'])
        #     ids.append(movie_id)
        #     titles.append(movie)
        # print(movie)
        
            
    except:
        print(f"'{movie}' failed again")

'CRITICAL ROLE' failed again
'NCIS' failed again
'NCIS' failed again
'TOP CHEF' failed again
'CHICAGO MED' failed again
'BATTLEBOTS' failed again
'CHICAGO MED, FIRE, PD' failed again
'HOW I MET YOUR MOTHER' failed again
'GREY’S ANATOMY ' failed again
'OUR FLAG MEANS DEATH' failed again
'MONDAY NIGHT FOOTBALL ' failed again
'CHICAGO PD' failed again
'STATION ELEVEN' failed again
'HGTV SHOWS' failed again
'I DON'T HAVE ONE' failed again
'JANE THE VIRGIN ' failed again
'THE SAPRANOS' failed again
'I DON'T HAVE ONE ' failed again
'CALL THE MIDWIFE' failed again
'GREYS ANATOMY' failed again
'SPY X FAMILY' failed again
'OWL HOUSE' failed again
'EVERYONE LOVES RAYMOND' failed again
'NCIS' failed again
'MILWAUKEE BREWERS PREGAME' failed again
'CARIBBEAN LIFE' failed again
'NO CURRENT FAVORITE' failed again
'THE CBS MORNING SHOW' failed again
'GREY'S ANATOMY' failed again
'GREY’S ANATOMY ' failed again
'NCIS' failed again
'GREYS ANATOMY' failed again
'RIGHT NOW STANGERS THINGS' failed again
'OU