In [103]:


import pandas as pd
import requests
import time
import random
import json
from typing import Tuple, Optional
import urllib.parse


CSV_FILE_PATH = 'archive/korean_drama.csv'
OUTPUT_FILE_PATH = 'korean_drama_with_genres.csv'
DRAMA_TITLE_COLUMN = 'drama_name'  




BASE_DELAY = 3  
SAVE_FREQUENCY = 10  



class GenreFinder:
    
    
    def __init__(self, tmdb_key=None, omdb_key=None):
        self.session = requests.Session()
        self.tmdb_key = tmdb_key
        self.omdb_key = omdb_key
        self.stats = {'tmdb': 0, 'omdb': 0, 'tvmaze': 0, 'predefined': 0, 'failed': 0}
        
    def find_genre(self, drama_name: str) -> Tuple[str, str]:
        """
        Find genre using multiple sources
        Returns: (genre, source)
        """
        # Strategy 1: Check predefined list first (instant)
        genre, source = self._check_predefined(drama_name)
        if genre:
            self.stats['predefined'] += 1
            return genre, source
        
       
        
        # Strategy 3: Try TVMaze (no API key needed, very reliable)
        genre, source = self._search_tvmaze(drama_name)
        if genre:
            self.stats['tvmaze'] += 1
            return genre, source
        time.sleep(1)
        
        
        
        self.stats['failed'] += 1
        return "Not Found", "None"
    
    def _check_predefined(self, drama_name: str) -> Tuple[Optional[str], Optional[str]]:
        """Check against predefined genre list"""
        predefined = {
            # Add your known dramas here for instant lookup
            'd.p.': 'Action, Drama, Military',
            'dp': 'Action, Drama, Military',
            'celebrity': 'Drama, Thriller',
            'sing my crush': 'Romance, Music',
            'shadow detective': 'Crime, Mystery, Thriller',
            'to be honest': 'Drama, Romance',
            'blue temperature': 'Romance, Drama',
            'bloodhounds': 'Action, Crime, Drama',
            'destined with you': 'Romance, Fantasy',
            'good bad mother': 'Drama, Family',
            'king the land': 'Romance, Comedy',
            'move to heaven': 'Drama',
            'squid game': 'Action, Thriller, Drama',
            'crash landing on you': 'Romance, Comedy, Drama',
            'itaewon class': 'Drama',
            'vincenzo': 'Crime, Comedy, Drama',
            'hometown cha-cha-cha': 'Romance, Comedy',
            'extraordinary attorney woo': 'Drama, Comedy',
            'business proposal': 'Romance, Comedy',
            'alchemy of souls': 'Fantasy, Romance, Drama',
            'our blues': 'Drama, Romance',
            'twenty five twenty one': 'Drama, Romance',
            'all of us are dead': 'Horror, Thriller, Drama',
            'my name': 'Action, Crime, Thriller',
            'hellbound': 'Horror, Thriller, Fantasy',
            'the glory': 'Thriller, Drama',
            'sweet home': 'Horror, Thriller, Fantasy',
            'kingdom': 'Horror, Thriller, Historical',
        }
        
        drama_lower = drama_name.lower().strip()
        
        # Exact match
        if drama_lower in predefined:
            return predefined[drama_lower], "Predefined"
        
        # Partial match
        for key, genre in predefined.items():
            if key in drama_lower or drama_lower in key:
                return genre, "Predefined"
        
        return None, None
    
   
    
    def _search_tvmaze(self, drama_name: str) -> Tuple[Optional[str], Optional[str]]:
        """Search TVMaze API (no key needed)"""
        try:
            search_url = "https://api.tvmaze.com/search/shows"
            params = {'q': drama_name}
            
            response = self.session.get(search_url, params=params, timeout=10)
            if response.status_code != 200:
                return None, None
            
            data = response.json()
            if not data:
                return None, None
            
            # Get first result
            show = data[0].get('show', {})
            genres = show.get('genres', [])
            
            if genres:
                return ', '.join(genres), "TVMaze"
            
            return None, None
            
        except Exception as e:
            return None, None
    
   


def process_dramas():
    """Main processing function"""
    
    print("="*80)
    print("               KOREAN DRAMA GENRE SCRAPER v2.0")
    print("="*80)
    print()
    
    # Load CSV
    try:
        df = pd.read_csv(CSV_FILE_PATH)
        print(f"‚úì Loaded CSV: {len(df)} dramas found")
    except FileNotFoundError:
        print(f"‚úó ERROR: File '{CSV_FILE_PATH}' not found!")
        return
    except Exception as e:
        print(f"‚úó ERROR loading CSV: {e}")
        return
    
    # Check if drama column exists
    if DRAMA_TITLE_COLUMN not in df.columns:
        print(f"\n‚úó ERROR: Column '{DRAMA_TITLE_COLUMN}' not found!")
        print(f"Available columns: {', '.join(df.columns)}")
        return
    
    # Add genre columns if they don't exist
    if 'Genre' not in df.columns:
        df['Genre'] = None
    if 'Genre_Source' not in df.columns:
        df['Genre_Source'] = None
    
    # Create backup
    backup_path = CSV_FILE_PATH.replace('.csv', '_backup.csv')
    df.to_csv(backup_path, index=False)
    print(f"‚úì Backup created: {backup_path}")
    
    
    
    
    
    print("\n" + "="*80)
    print("Starting genre search...")
    print("="*80 + "\n")
    
    # Initialize finder
    finder = GenreFinder(tmdb_key=TMDB_API_KEY, omdb_key=OMDB_API_KEY)
    
    # Process each drama
    processed = 0
    skipped = 0
    found = 0
    not_found = 0
    
    for idx, row in df.iterrows():
        drama_name = str(row[DRAMA_TITLE_COLUMN]).strip()
        current_genre = row.get('Genre')
        
        # Skip if already has valid genre
        if pd.notna(current_genre) and current_genre not in ['Not Found', '', 'None']:
            print(f"[{idx+1}/{len(df)}] ‚è≠Ô∏è  Skipping: {drama_name} (already has genre)")
            skipped += 1
            continue
        
        print(f"\n[{idx+1}/{len(df)}] üîç Searching: {drama_name}")
        
        # Find genre
        genre, source = finder.find_genre(drama_name)
        
        # Update dataframe
        df.loc[idx, 'Genre'] = genre
        df.loc[idx, 'Genre_Source'] = source
        
        # Update stats
        processed += 1
        if genre != "Not Found":
            found += 1
            print(f"         ‚úÖ Found: {genre} [{source}]")
        else:
            not_found += 1
            print(f"         ‚ùå Not found")
        
        # Save progress periodically
        if processed % SAVE_FREQUENCY == 0:
            df.to_csv(OUTPUT_FILE_PATH, index=False, encoding='utf-8-sig')
            print(f"\n         üíæ Progress saved ({processed} processed)")
            print(f"         üìä Found: {found} | Not Found: {not_found} | Skipped: {skipped}")
        
        # Delay between requests
        if idx < len(df) - 1:
            delay = BASE_DELAY + random.uniform(1, 3)
            time.sleep(delay)
    
    # Final save
    df.to_csv(OUTPUT_FILE_PATH, index=False, encoding='utf-8-sig')
    
    # Print final report
    print("\n" + "="*80)
    print("                          FINAL REPORT")
    print("="*80)
    print(f"\nüìä Processing Summary:")
    print(f"   Total dramas:        {len(df)}")
    print(f"   Processed:           {processed}")
    print(f"   Skipped:             {skipped}")
    print(f"   Genres found:        {found} ({found/(processed or 1)*100:.1f}%)")
    print(f"   Not found:           {not_found} ({not_found/(processed or 1)*100:.1f}%)")
    
    print(f"\nüì° Sources Used:")
    for source, count in finder.stats.items():
        if count > 0:
            emoji = "‚úì" if source != 'failed' else "‚úó"
            print(f"   {emoji} {source.upper()}: {count}")
    
    print(f"\nüíæ Output saved to: {OUTPUT_FILE_PATH}")
    
    # Show sample of not found dramas
    if not_found > 0:
        not_found_dramas = df[df['Genre'] == 'Not Found'][DRAMA_TITLE_COLUMN].head(10).tolist()
        print(f"\n‚ö†Ô∏è  Sample of dramas not found (first 10):")
        for drama in not_found_dramas:
            print(f"   ‚Ä¢ {drama}")
        print(f"\n   üí° Consider adding these to the predefined list")
    
    print("\n" + "="*80)
    print("‚úÖ Process completed successfully!")
    print("="*80)


if __name__ == "__main__":
    try:
        process_dramas()
    except KeyboardInterrupt:
        print("\n\n‚ö†Ô∏è  Process interrupted by user")
        print("Progress has been saved")
    except Exception as e:
        print(f"\n\n‚ùå Unexpected error: {e}")
        import traceback
        traceback.print_exc()

               KOREAN DRAMA GENRE SCRAPER v2.0

‚úì Loaded CSV: 1752 dramas found
‚úì Backup created: archive/korean_drama_backup.csv

Starting genre search...



‚ùå Unexpected error: name 'TMDB_API_KEY' is not defined


Traceback (most recent call last):
  File "/var/folders/dy/_625pkpx76v262_s6lrwmz680000gn/T/ipykernel_49137/1733049600.py", line 263, in <module>
    process_dramas()
  File "/var/folders/dy/_625pkpx76v262_s6lrwmz680000gn/T/ipykernel_49137/1733049600.py", line 179, in process_dramas
    finder = GenreFinder(tmdb_key=TMDB_API_KEY, omdb_key=OMDB_API_KEY)
                                  ^^^^^^^^^^^^
NameError: name 'TMDB_API_KEY' is not defined


In [104]:
"""
Genre Matcher - Fill Missing Genres from Reference CSV
This script matches dramas from your main CSV with a reference CSV that has genres,
and fills in the missing genres.
"""


from difflib import SequenceMatcher
import re

# ===================== CONFIGURATION =====================
MAIN_CSV = 'archive/korean_drama_with_genres.csv'  # Your main CSV (the one with missing genres)
REFERENCE_CSV = 'archive/top100_kdrama.csv'  # CSV with 100 dramas that have genres
MAIN_DRAMA_COLUMN = 'drama_name'  # Drama name column in main CSV
REFERENCE_DRAMA_COLUMN = 'Name'  # Drama name column in reference CSV
REFERENCE_GENRE_COLUMN = 'Genre'  # Genre column in reference CSV
OUTPUT_CSV = 'korean_drama_complete.csv'  # Output file

SIMILARITY_THRESHOLD = 0.75  # 85% similarity required for a match (0.0 to 1.0)
# =========================================================


def clean_drama_name(name):
    """Clean drama name for better matching"""
    if pd.isna(name):
        return ""
    
    name = str(name).lower().strip()
    
    # Remove common variations
    name = re.sub(r'\s*\(.*?\)\s*', '', name)  # Remove (year) or (country)
    name = re.sub(r'\s*season\s+\d+\s*', '', name, flags=re.IGNORECASE)  # Remove "Season 2"
    name = re.sub(r'\s*:\s*', ' ', name)  # Replace : with space
    name = re.sub(r'[^\w\s]', '', name)  # Remove special characters
    name = re.sub(r'\s+', ' ', name)  # Normalize spaces
    
    return name.strip()


def calculate_similarity(str1, str2):
    """Calculate similarity between two strings (0.0 to 1.0)"""
    return SequenceMatcher(None, str1, str2).ratio()


def find_best_match(drama_name, reference_df, reference_column):
    """
    Find the best matching drama in reference CSV
    Returns: (matched_drama_name, similarity_score, index)
    """
    cleaned_target = clean_drama_name(drama_name)
    
    if not cleaned_target:
        return None, 0, None
    
    best_match = None
    best_score = 0
    best_idx = None
    
    for idx, row in reference_df.iterrows():
        ref_name = row[reference_column]
        cleaned_ref = clean_drama_name(ref_name)
        
        if not cleaned_ref:
            continue
        
        # Calculate similarity
        similarity = calculate_similarity(cleaned_target, cleaned_ref)
        
        # Check for exact match after cleaning
        if cleaned_target == cleaned_ref:
            return ref_name, 1.0, idx
        
        # Track best match
        if similarity > best_score:
            best_score = similarity
            best_match = ref_name
            best_idx = idx
    
    return best_match, best_score, best_idx


def match_and_fill_genres():
    """Main function to match dramas and fill missing genres"""
    
    print("="*80)
    print("           GENRE MATCHER - Fill Missing Genres from Reference CSV")
    print("="*80)
    print()
    
    # Load main CSV
    try:
        main_df = pd.read_csv(MAIN_CSV)
        print(f"‚úì Loaded main CSV: {len(main_df)} dramas")
    except FileNotFoundError:
        print(f"‚úó ERROR: Main CSV '{MAIN_CSV}' not found!")
        return
    except Exception as e:
        print(f"‚úó ERROR loading main CSV: {e}")
        return
    
    # Load reference CSV
    try:
        ref_df = pd.read_csv(REFERENCE_CSV)
        print(f"‚úì Loaded reference CSV: {len(ref_df)} dramas")
    except FileNotFoundError:
        print(f"‚úó ERROR: Reference CSV '{REFERENCE_CSV}' not found!")
        return
    except Exception as e:
        print(f"‚úó ERROR loading reference CSV: {e}")
        return
    
    # Validate columns
    if MAIN_DRAMA_COLUMN not in main_df.columns:
        print(f"‚úó ERROR: Column '{MAIN_DRAMA_COLUMN}' not found in main CSV!")
        print(f"Available columns: {', '.join(main_df.columns)}")
        return
    
    if REFERENCE_DRAMA_COLUMN not in ref_df.columns:
        print(f"‚úó ERROR: Column '{REFERENCE_DRAMA_COLUMN}' not found in reference CSV!")
        print(f"Available columns: {', '.join(ref_df.columns)}")
        return
    
    if REFERENCE_GENRE_COLUMN not in ref_df.columns:
        print(f"‚úó ERROR: Column '{REFERENCE_GENRE_COLUMN}' not found in reference CSV!")
        print(f"Available columns: {', '.join(ref_df.columns)}")
        return
    
    # Ensure Genre columns exist in main CSV
    if 'Genre' not in main_df.columns:
        main_df['Genre'] = None
    if 'Genre_Source' not in main_df.columns:
        main_df['Genre_Source'] = None
    
    # Create backup
    backup_path = MAIN_CSV.replace('.csv', '_before_matching.csv')
    main_df.to_csv(backup_path, index=False)
    print(f"‚úì Backup created: {backup_path}")
    
    # Find dramas with missing genres
    missing_mask = (main_df['Genre'].isna()) | (main_df['Genre'] == 'Not Found') | (main_df['Genre'] == '')
    missing_dramas = main_df[missing_mask]
    
    print(f"\nüìä Analysis:")
    print(f"   Total dramas in main CSV: {len(main_df)}")
    print(f"   Dramas with genres: {len(main_df) - len(missing_dramas)}")
    print(f"   Dramas missing genres: {len(missing_dramas)}")
    print(f"   Reference dramas available: {len(ref_df)}")
    
    print(f"\n{'='*80}")
    print(f"Starting matching process (Similarity threshold: {SIMILARITY_THRESHOLD*100}%)...")
    print(f"{'='*80}\n")
    
    # Match and fill
    matched_count = 0
    not_matched_count = 0
    matches_list = []
    
    for idx, row in missing_dramas.iterrows():
        drama_name = row[MAIN_DRAMA_COLUMN]
        
        print(f"[{matched_count + not_matched_count + 1}/{len(missing_dramas)}] Matching: {drama_name}")
        
        # Find best match in reference CSV
        matched_name, similarity, ref_idx = find_best_match(
            drama_name, ref_df, REFERENCE_DRAMA_COLUMN
        )
        
        if similarity >= SIMILARITY_THRESHOLD and ref_idx is not None:
            # Get genre from reference
            genre = ref_df.loc[ref_idx, REFERENCE_GENRE_COLUMN]
            
            if pd.notna(genre) and genre not in ['', 'Not Found']:
                # Update main dataframe
                main_df.loc[idx, 'Genre'] = genre
                main_df.loc[idx, 'Genre_Source'] = 'Reference CSV'
                
                matched_count += 1
                print(f"   ‚úÖ MATCHED ({similarity*100:.1f}%): '{matched_name}'")
                print(f"      Genre: {genre}")
                
                # Store match info
                matches_list.append({
                    'Original': drama_name,
                    'Matched': matched_name,
                    'Similarity': f"{similarity*100:.1f}%",
                    'Genre': genre
                })
            else:
                not_matched_count += 1
                print(f"   ‚ùå Match found but no genre available")
        else:
            not_matched_count += 1
            if matched_name:
                print(f"   ‚ùå Best match: '{matched_name}' ({similarity*100:.1f}%) - Below threshold")
            else:
                print(f"   ‚ùå No match found")
    
    # Save updated CSV
    main_df.to_csv(OUTPUT_CSV, index=False, encoding='utf-8-sig')
    
    # Create detailed match report
    if matches_list:
        match_report_df = pd.DataFrame(matches_list)
        report_path = 'genre_match_report.csv'
        match_report_df.to_csv(report_path, index=False, encoding='utf-8-sig')
    
    # Print final report
    print(f"\n{'='*80}")
    print(f"                          MATCHING REPORT")
    print(f"{'='*80}")
    print(f"\nüìä Results:")
    print(f"   Dramas processed:        {len(missing_dramas)}")
    print(f"   Successfully matched:    {matched_count} ({matched_count/len(missing_dramas)*100:.1f}%)")
    print(f"   Not matched:             {not_matched_count} ({not_matched_count/len(missing_dramas)*100:.1f}%)")
    
    print(f"\nüìÅ Files created:")
    print(f"   ‚úì Updated CSV: {OUTPUT_CSV}")
    print(f"   ‚úì Backup: {backup_path}")
    if matches_list:
        print(f"   ‚úì Match report: {report_path}")
    
    # Show updated statistics
    final_missing = main_df[(main_df['Genre'].isna()) | (main_df['Genre'] == 'Not Found') | (main_df['Genre'] == '')]
    print(f"\nüìà Final Statistics:")
    print(f"   Total dramas: {len(main_df)}")
    print(f"   With genres: {len(main_df) - len(final_missing)} ({(len(main_df) - len(final_missing))/len(main_df)*100:.1f}%)")
    print(f"   Still missing: {len(final_missing)} ({len(final_missing)/len(main_df)*100:.1f}%)")
    
    # Show sample matches
    if matches_list:
        print(f"\n‚ú® Sample Matches (first 5):")
        for i, match in enumerate(matches_list[:5], 1):
            print(f"   {i}. '{match['Original']}' ‚Üí '{match['Matched']}' ({match['Similarity']})")
            print(f"      Genre: {match['Genre']}")
    
    # Show remaining not found dramas
    if len(final_missing) > 0:
        remaining = final_missing[MAIN_DRAMA_COLUMN].head(100).tolist()
        print(f"\n‚ö†Ô∏è  Still missing genres (first 10):")
        for drama in remaining:
            print(f"   ‚Ä¢ {drama}")
    
    print(f"\n{'='*80}")
    print(f"‚úÖ Matching completed successfully!")
    print(f"{'='*80}")


if __name__ == "__main__":
    try:
        match_and_fill_genres()
    except KeyboardInterrupt:
        print("\n\n‚ö†Ô∏è  Process interrupted by user")
    except Exception as e:
        print(f"\n\n‚ùå Unexpected error: {e}")
        import traceback
        traceback.print_exc()

           GENRE MATCHER - Fill Missing Genres from Reference CSV

‚úó ERROR: Main CSV 'archive/korean_drama_with_genres.csv' not found!


<h1> Change ID tag </h1>

In [105]:
INPUT_CSV = 'korean_drama_complete.csv'  # Your input CSV file
OUTPUT_CSV = 'korean_drama_final.csv'  # Output file with integer IDs
ID_COLUMN = 'kdrama_id'  # Name of the ID column to convert
START_ID = 1

In [106]:
try:
    df = pd.read_csv(INPUT_CSV)
    print(f"‚úì Loaded CSV: {len(df)} dramas")
except FileNotFoundError:
    print(f"‚úó ERROR: File '{INPUT_CSV}' not found!")
    
except Exception as e:
    print(f"‚úó ERROR loading CSV: {e}")
    
    

old_ids = df[ID_COLUMN].tolist()
new_ids = list(range(START_ID, START_ID + len(df)))
    
# Create mapping dictionary for reference
id_mapping = {old: new for old, new in zip(old_ids, new_ids)}
    
# Replace IDs
print(f"\nüîÑ Converting {len(df)} IDs to integers...")
df[ID_COLUMN] = new_ids
    
# Save updated CSV
df.to_csv(OUTPUT_CSV, index=False, encoding='utf-8-sig')
    

‚úì Loaded CSV: 1752 dramas

üîÑ Converting 1752 IDs to integers...


In [107]:
df = df[~df['Genre'].isin(['Not Found'])]


In [108]:
df['Genre'].value_counts()

Drama, Romance              224
Drama, Comedy, Romance      134
Drama                        57
Drama, Family, Romance       43
Comedy, Romance              40
                           ... 
Romance, Legal                1
Comedy, Mystery, History      1
Comedy, Fantasy, History      1
Drama, Action, Romance        1
Romance, Music                1
Name: Genre, Length: 243, dtype: int64

In [109]:
df[ID_COLUMN] = range(1, len(df) + 1)

In [110]:
df = df.drop(['ID','Genre_Source'], axis = 1)

KeyError: "['ID'] not found in axis"

In [None]:
df.to_csv(OUTPUT_CSV, index=False, encoding='utf-8-sig')

In [None]:
df.info()

In [None]:
df_2 = pd.read_csv('item.csv')

In [None]:
df_2.head()

In [None]:
df_merged = pd.merge(
    df_2,             # Left DataFrame (keep all rows)
    df[['drama_name','kdrama_id', 'Genre','rank','pop']], # Right DataFrame (select only needed columns)
    left_on='title',        # Column from df_reviews
    right_on='drama_name',            # Join key
    how='left'              # Type of join
)

In [None]:
df_merged.head(10)

In [None]:
df_merged.info()

In [None]:
df_null_matches = df_merged[df_merged['drama_name'].isna()]
print(df_null_matches['title'].unique())


In [None]:
df_clean = df_merged.dropna(subset=['kdrama_id'])

In [None]:
df_clean.shape

In [None]:
df_clean.head(10)

In [None]:
df_clean.drop(['title','user_id'], axis = 1)

In [None]:
df_clean = df_clean[['kdrama_id', 'drama_name', 'rank', 'pop', 'Genre']]

In [112]:
df_clean.head(30)

Unnamed: 0,kdrama_id,drama_name,rank,pop,Comedy,Crime,Drama,Drama.1,Law,Life,...,Music,Mystery,Psychological,Romance,Science-Fiction,Sports,Supernatural,Thriller,Travel,War
0,1.0,Sing My Crush,1484.0,2238.0,0,0,0,0,0,0,...,1,0,0,1,0,0,0,0,0,0
1,32.0,Happy Merry Ending,7632.0,1366.0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
3,51.0,Our Dating Sim,2185.0,569.0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
4,80.0,The Director Who Buys Me Dinner,6529.0,679.0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
5,84.0,Unlock My Boss,2117.0,1088.0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
6,114.0,Roommates of Poongduck 304,2870.0,570.0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
7,127.0,The Golden Spoon,1903.0,658.0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,1,0,0
8,150.0,Big Mouth,395.0,163.0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
9,203.0,Blueming,1715.0,367.0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
10,223.0,Grid,5507.0,1195.0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,1,0,0


In [115]:
df_clean.columns = df_clean.columns.str.strip()


df_clean = df_clean.groupby(df_clean.columns, axis=1).max()

In [119]:
df_clean.head(23)

Unnamed: 0,Action,Adult,Adventure,Anime,Comedy,Crime,DIY,Drama,Espionage,Family,...,Sports,Supernatural,Thriller,Travel,War,Youth,drama_name,kdrama_id,pop,rank
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,Sing My Crush,1.0,2238.0,1484.0
1,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,Happy Merry Ending,32.0,1366.0,7632.0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,Our Dating Sim,51.0,569.0,2185.0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,The Director Who Buys Me Dinner,80.0,679.0,6529.0
5,0,0,0,0,1,0,0,0,0,0,...,0,0,1,0,0,0,Unlock My Boss,84.0,1088.0,2117.0
6,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,Roommates of Poongduck 304,114.0,570.0,2870.0
7,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,The Golden Spoon,127.0,658.0,1903.0
8,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,Big Mouth,150.0,163.0,395.0
9,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,Blueming,203.0,367.0,1715.0
10,0,0,0,0,0,0,0,1,0,0,...,0,0,1,0,0,0,Grid,223.0,1195.0,5507.0


In [117]:
df_clean.to_csv('final_item.csv', index=False, encoding='utf-8-sig')

In [130]:
df_clean = df_clean.reset_index()

In [133]:
print(df_clean.iloc[:24, :30])

    index  Action  Adult  Adventure  Anime  Comedy  Crime  DIY  Drama  \
0       0       0      0          0      0       0      0    0      0   
1       1       0      0          0      0       0      0    0      1   
2       3       0      0          0      0       0      0    0      0   
3       4       0      0          0      0       0      0    0      0   
4       5       0      0          0      0       1      0    0      0   
5       6       0      0          0      0       0      0    0      1   
6       7       0      0          0      0       0      0    0      0   
7       8       0      0          0      0       1      0    0      0   
8       9       0      0          0      0       0      0    0      1   
9      10       0      0          0      0       0      0    0      1   
10     11       0      0          0      0       0      0    0      1   
11     12       0      0          0      0       0      0    0      1   
12     13       0      0          0      0       0 