In [27]:
import requests
import pandas as pd

data = pd.read_csv("final_data.csv")

def search_show(show_name):
    search_url = f"https://api.tvmaze.com/search/shows?q={show_name}"
    response = requests.get(search_url)
    if response.status_code == 200:
        search_results = response.json()
        if search_results:
            # Return the first search result
            return search_results[0]['show']
    return None

def get_show_details(show_id):
    details_url = f"https://api.tvmaze.com/shows/{show_id}"
    response = requests.get(details_url)
    if response.status_code == 200:
        return response.json()
    return None

def get_cast(show_id):
    cast_url = f"https://api.tvmaze.com/shows/{show_id}/cast"
    response = requests.get(cast_url)
    if response.status_code == 200:
        return response.json()
    return None

def get_crew(show_id):
    crew_url = f"https://api.tvmaze.com/shows/{show_id}/crew"
    response = requests.get(crew_url)
    if response.status_code == 200:
        return response.json()
    return None

def extract_show_data(show_name):
    show_data = {
        'Name': show_name,  
        'Genres': None,
        'Status': None,
        'Runtime': None,
        'Premiered': None,
        'Creators': None,
        'Cast': None,
        'Crew': None
    }
    
    search_result = search_show(show_name)
    if search_result:
        show_id = search_result['id']
        show_details = get_show_details(show_id)
        show_cast = get_cast(show_id)
        show_crew = get_crew(show_id)
        
        if show_details:
            show_data['Genres'] = ', '.join(show_details['genres'])
            show_data['Status'] = show_details['status']
            show_data['Runtime'] = show_details['runtime']
            show_data['Premiered'] = show_details['premiered']
            
            # Find creators in the crew list
            creators = [member['person']['name'] for member in show_crew if member['type'] == 'Creator']
            show_data['Creators'] = ', '.join(creators)
            
            # Collect cast information
            if show_cast:
                cast_list = [f"{cast_member['person']['name']} as {cast_member['character']['name']}" for cast_member in show_cast]
                show_data['Cast'] = ', '.join(cast_list)
            
            # Collect crew information
            if show_crew:
                crew_list = [f"{crew_member['person']['name']} - {crew_member['type']}" for crew_member in show_crew]
                show_data['Crew'] = ', '.join(crew_list)
    
    return show_data


show_names = data['name']



shows_data = []
for show_name in show_names:
    show_data = extract_show_data(show_name)
    shows_data.append(show_data)


df = pd.DataFrame(shows_data)


df

Unnamed: 0,Name,Genres,Status,Runtime,Premiered,Creators,Cast,Crew
0,gud se meetha ishq,,,,,,,
1,kemono jihen,"Action, Anime, Fantasy, Mystery",Ended,30.0,2021-01-10,,,
2,fastest cars in the dirty south,,Running,,2019-07-12,,"Eric Malone as Eric Malone, Brant Arnold as Br...",
3,hutatma,,,,,,,
4,the great escape (french),,,,,,,
...,...,...,...,...,...,...,...,...
3486,india on film,,,,,,,
3487,sankellu,,,,,,,
3488,mangalyam thanthunaane,,,,,,,
3489,el presidente: corruption game,"Drama, Comedy, Sports",To Be Determined,,2020-06-05,Pablo Larraín,"Andrés Parra as Sergio Jadue, Karla Souza as L...","Pablo Larraín - Creator, Pablo Larraín - Produ..."


In [29]:
df.to_csv('rest.csv', index=False)

In [31]:
import pandas as pd

# List of CSV files to merge
csv_files = ['0-1000.csv', '1000-3000.csv', '3000-8000.csv', 'rest.csv']


dataframes = [pd.read_csv(file) for file in csv_files]

# Concatenate all DataFrames in the list into a single DataFrame
merged_df = pd.concat(dataframes, ignore_index=True)


merged_df.to_csv('merged_shows_data.csv', index=False)


import os
print("Merged file saved in:", os.path.join(os.getcwd(), 'merged_shows_data.csv'))


Merged file saved in: /Users/girirajpurohit23/Downloads/Internship/merged_shows_data.csv


In [33]:
dt = pd.read_csv('merged_shows_data.csv')

In [39]:
dt.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11492 entries, 0 to 11491
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Name       11492 non-null  object 
 1   Genres     3557 non-null   object 
 2   Status     4552 non-null   object 
 3   Runtime    3039 non-null   float64
 4   Premiered  4454 non-null   object 
 5   Creators   1118 non-null   object 
 6   Cast       2754 non-null   object 
 7   Crew       1511 non-null   object 
dtypes: float64(1), object(7)
memory usage: 718.4+ KB
