In [1]:
import pandas as pd  # dataframe library

filename = 'csv/kdrama_data.csv'
# filename = r"C:\Users\John Kim\Desktop\kdrama_data.csv"

df = pd.read_csv(filename)
df = df[['title', 'description', 'keywords',
         'genres', 'actors', 'director', 'screenwriter']]


def remove_chars(string):
    if isinstance(string, str) == False:
        return string
    remove_list = ["[", "]", "'"]
    for remove in remove_list:
        string = string.replace(remove, "")
    return string


def get_info(title):
    df = pd.read_csv(filename)
    fill_na()
    index = search_kdrama(title)

    row = df.loc[index]
    dicti = row.to_dict()
    # removes unnecessary characters
    columns = ['keywords', 'genres', 'actors']
    for column in columns:
        dicti[column] = remove_chars(dicti[column])
    return dicti


def get_desc_word_count(desc):
    word_list = desc.split(' ')
    word_count = len(word_list)
    return word_count


def get_titles():
    np_titles = df['title'].to_numpy()
    title_list = np_titles.tolist()
    return title_list


def kdrama_exists(title, klist):
    if title is None:
        return False
    for kdrama in klist:
        if title.lower() == kdrama.lower():
            return True
    return False


def fill_na():
    """replaces na values with an empty string"""
    df.replace("N/A", "")
    for label in df.columns:
        df[label] = df[label].fillna('')  # fills N/A values with ""


def get_indices():
    indices = pd.Series(df.index, index=df['title'])
    return indices[~indices.index.duplicated(keep='last')]


def search_kdrama(kdrama_name):
    """searches for kdrama with matching name and returns top result"""
    # return get_indices()[get_indices().index.str.contains(kdrama_name, regex=False, na=False)][0]
    return get_indices()[get_indices().index.str.contains(kdrama_name.lower(), case=False, regex=False, na=False)][0]


In [2]:
df

Unnamed: 0,title,description,keywords,genres,actors,director,screenwriter
0,Move to Heaven,Han Geu Roo is a 20-year-old with Autism. He w...,"['Uncle-Nephew Relationship', 'Autism', 'Death...","['Life', 'Drama', 'Family']","['Lee Je Hoon', 'Tang Jun Sang', 'Hong Seung H...",Kim Sung Ho,Yoon Ji Ryun
1,Hospital Playlist,The stories of people going through their days...,"['Multiple Mains', 'Best Friends', 'Slow Roman...","['Friendship', 'Romance', 'Life', 'Medical']","['Jo Jung Suk', 'Yoo Yeon Seok', 'Jung Kyung H...",Shin Won Ho,Lee Woo Jung
2,Flower of Evil,Although Baek Hee Sung is hiding a dark secret...,"['Married Couple', 'Deception', 'Family Secret...","['Thriller', 'Romance', 'Crime', 'Melodrama']","['Lee Joon Gi', 'Moon Chae Won', 'Jang Hee Jin...","Kim Chul Gyu, Yoon Jong Ho",Yoo Jung Hee
3,Hospital Playlist 2,Everyday is extraordinary for five doctors and...,"['Workplace', 'Strong Friendship', 'Best Frien...","['Friendship', 'Romance', 'Life', 'Medical']","['Jo Jung Suk', 'Yoo Yeon Seok', 'Jung Kyung H...",Shin Won Ho,Lee Woo Jung
4,My Mister,Park Dong Hoon is a middle-aged engineer who i...,"['Nice Male Lead', 'Strong Female Lead', 'Smar...","['Psychological', 'Life', 'Drama', 'Family']","['Lee Sun Kyun', 'IU', 'Park Ho San', 'Song Sa...","Kim Won Suk, Kim Sang Woo",Park Hae Young
...,...,...,...,...,...,...,...
3027,Never Give Up,"Depicts the bromance of Goo Pil Soo, a breadwi...","['Businessman Male Lead', 'Entrepreneur Male L...","['Comedy', 'Life', 'Drama']","['Yoon Doo Joon', 'Kwak Do Won', 'Han Go Eun',...",Choi Do Hoon,Son Geun Joo
3028,Doctor Lawyer,Han Yi Han was an elite surgeon. He graduated...,"['Thoracic Surgeon Male Lead', 'Prosecutor Fem...","['Thriller', 'Law', 'Drama', 'Medical']","['So Ji Sub', 'Shin Sung Rok', 'Im Soo Hyang',...",Lee Yong Seok,Jang Hong Chul
3029,My Rocket Ship,"Gong Ji Hoon, CEO of an AI art collection star...","['Startup', 'Entrepreneur Male Lead', 'Short L...","['Business', 'Life', 'Youth', 'Drama']","['Kim Ji Hoon', 'Moon Kang Hyuk', 'Oh Se Young...",,
3030,Gold Mask,"A tragedy occurs for three women, caused by gr...","['Married Female Lead', 'Loan Shark Female Lea...","['Drama', 'Family', 'Melodrama']","['Cha Ye Ryun', 'Lee Hyun Jin', 'Na Young Hee'...",Uh Soo Sun,Kim Min Joo


In [24]:
from collections import Counter
import plotly.express as px
import numpy as np

df_net = pd.read_csv('/Users/johnkim/Desktop/netflix_titles.csv')

In [25]:


# dealing with missing values by replacement
df_net['country'] = df_net['country'].fillna(df_net['country'].mode()[0])
df_net['cast'].replace(np.nan, 'No Data', inplace  = True)
df_net['director'].replace(np.nan, 'No Data', inplace  = True)

# drop NA
df_net.dropna(how = 'any', inplace=True)

# drop Duplicates
df_net.drop_duplicates(inplace= True)

df_tv=df[df['type']=='TV Show']


In [28]:


#tv show
vc1 = df_tv['country'].value_counts().reset_index()
vc1 = vc1.rename(columns = {'index':'country', 'country':'count'})

country_tv=list(df_tv['country'])
print(country_tv)


cou_tv=[]
for i in country_tv:
    i = str(i)
    if i == 'nan': continue
    k = list(i.split(', '))
    for j in k:
        cou_tv.append(j)

g = Counter(cou_tv)
counts = pd.DataFrame(g.items(), columns=['label', 'counts'])

fig1 = px.choropleth(counts,
              locations='label',
              locationmode='country names',
              color='counts',
              projection='kavrayskiy7',
              scope='world',
              title='TV Show Content from Different Countries',
              color_continuous_scale=px.colors.sequential.Sunsetdark)

fig1.show()

['South Africa', 'United States', 'United States', 'India', 'United States', 'United Kingdom', 'United States', 'United States', 'United States', 'United States', 'Mexico', 'United States', 'United States', 'Turkey', 'Australia', 'United States', 'United Kingdom', 'United States', 'United States', 'Finland', 'India', 'United States', 'United States', 'Nigeria', 'India', 'United States', 'United Kingdom', 'India', 'United States', 'India', 'United States', 'United Kingdom', 'United States', 'Japan', 'United States', 'United States', 'United States', 'United States', 'United States', 'Belgium', 'United States, United Kingdom', 'United States', 'United Kingdom', 'France, United States', 'United States', 'United States', 'South Korea', 'United States', 'Australia', 'Spain', 'United States', 'United States, Singapore', 'Australia', 'United States', 'United States', 'South Korea', 'United States', 'United States', 'United States', 'United States', 'Russia', 'Japan', 'United States', 'United 

In [29]:
import plotly.io as pio

fh = open('data_viz_embed.html', 'w') # this just creates a blank file, so that the write_html() function can write to this file later
fh.close()

pio.write_html(fig1, file='data_viz_embed_just_div.html', auto_open=True, full_html=False) # preferred method