In [1]:
import pandas as pd
import numpy as np

In [33]:
df = pd.read_csv(r"C:\Users\santh\OneDrive\Desktop\DS\Web Scrapping\Raw_Data_anime.csv")

In [34]:
df

Unnamed: 0.1,Unnamed: 0,Titles,Tv,Ep,SD,Ed,Ratings,Genres,Source,Demographic,Producers
0,0,Fullmetal Alchemist: Brotherhood,TV,64 eps,Apr 2009,Jul 2010,9.09,"['Action', 'Adventure', 'Drama', 'Fantasy', 'M...",Manga,Shounen,"['Aniplex', 'Square Enix', 'Mainichi Broadcast..."
1,1,Steins;Gate,TV,24 eps,Apr 2011,Sep 2011,9.07,"['Drama', 'Sci-Fi', 'Suspense', 'Psychological...",Drama,Not specified,"['Frontier Works', 'Media Factory', 'Kadokawa ..."
2,2,Gintama°,TV,51 eps,Apr 2015,Mar 2016,9.06,"['Action', 'Comedy', 'Sci-Fi', 'Gag Humor', 'H...",Manga,Shounen,"['TV Tokyo', 'Aniplex', 'Dentsu']"
3,3,Shingeki no Kyojin Season 3 Part 2,TV,10 eps,Apr 2019,Jul 2019,9.05,"['Action', 'Drama', 'Suspense', 'Gore', 'Milit...",Manga,Shounen,"['Production I.G', 'Dentsu', 'Mainichi Broadca..."
4,4,Gintama: The Final,Movie,,Jan 2021,Jan 2021,9.04,"['Action', 'Comedy', 'Drama', 'Sci-Fi', 'Gag H...",Manga,Shounen,"['TV Tokyo', 'Aniplex', 'Dentsu', 'Bandai', 'W..."
...,...,...,...,...,...,...,...,...,...,...,...
395,395,Ashita no Joe 2,TV,47 eps,Oct 1980,Aug 1981,8.72,"['Drama', 'Sports', 'Combat Sports', 'Shounen']",Manga,Shounen,['Annapuru']
396,396,Shouwa Genroku Rakugo Shinjuu: Sukeroku Futata...,TV,12 eps,Jan 2017,Mar 2017,8.71,"['Drama', 'Adult Cast', 'Historical', 'Love Po...",Manga,Josei,"['Mainichi Broadcasting System', 'Kodansha', '..."
397,397,Mob Psycho 100 III,TV,12 eps,Oct 2022,Dec 2022,8.71,"['Action', 'Comedy', 'Supernatural', 'Super Po...",Web manga,Not specified,"['Shogakukan-Shueisha Productions', 'Warner Br..."
398,398,Rurouni Kenshin: Meiji Kenkaku Romantan - Tsui...,,,Feb 1999,Sep 1999,8.70,"['Action', 'Drama', 'Romance', 'Adult Cast', '...",Manga,Shounen,['Aniplex']


# About this dataset

This dataset contains 400 anime records scraped from the source site.  
Each row represents an anime and includes the following fields:

- `Titles`: anime title (string)
- `Tv`: format type (TV, Movie, OVA, etc.)
- `Ep`: episode count 
- `SD`: start date / season 
- `Ed`: end date 
- `Ratings`: numeric rating (float)
- `Genres`: list-like string of genres 
- `Source`: source material (Manga, Original, Light novel, etc.)
- `Demographic`: target demographic (Shounen, Seinen, etc.)
- `Producers`: list-like string of producers 




In [35]:
df.columns

Index(['Unnamed: 0', 'Titles', 'Tv', 'Ep', 'SD', 'Ed', 'Ratings', 'Genres',
       'Source', 'Demographic', 'Producers'],
      dtype='object')

### summary of data types & missing values

In [36]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Unnamed: 0   400 non-null    int64  
 1   Titles       400 non-null    object 
 2   Tv           374 non-null    object 
 3   Ep           276 non-null    object 
 4   SD           400 non-null    object 
 5   Ed           386 non-null    object 
 6   Ratings      400 non-null    float64
 7   Genres       400 non-null    object 
 8   Source       400 non-null    object 
 9   Demographic  400 non-null    object 
 10  Producers    400 non-null    object 
dtypes: float64(1), int64(1), object(9)
memory usage: 34.5+ KB


In [37]:
df = df.drop(columns=['Unnamed: 0'])

In [38]:
df = df.reset_index(drop=True)

In [39]:
###numeric stats (like Ratings)

In [40]:
df.describe()

Unnamed: 0,Ratings
count,400.0
mean,8.850875
std,0.104344
min,8.7
25%,8.76
50%,8.82
75%,8.93
max,9.09


### unique values for each categorical column

In [42]:
# df['Tv'].value_counts()
# df['Source'].value_counts()
# df['Demographic'].value_counts()


In [43]:
for col in ['Tv','Source','Demographic']:
    print(f"\nColumn: {col}")
    print(df[col].unique())



Column: Tv
['TV' 'Movie' nan]

Column: Source
['Manga' 'Drama' 'Light novel' 'Action' 'Award Winning' 'Web manga'
 '4-koma manga' 'Adventure']

Column: Demographic
['Shounen' 'Not specified' 'Seinen' 'Shoujo' 'Josei']


In [44]:
print("Unique titles:", df['Titles'].nunique())
print("Unique genres strings:", df['Genres'].nunique())
print("Unique producers strings:", df['Producers'].nunique())


Unique titles: 57
Unique genres strings: 41
Unique producers strings: 49


In [45]:
# Missing values count
missing_count = df.isnull().sum()

# Missing values percentage
missing_pct = (df.isnull().mean() * 100).round(2)

# Combine into one table
missing_table = pd.DataFrame({
    'Missing Values': missing_count,
    'Missing %': missing_pct
})

missing_table


Unnamed: 0,Missing Values,Missing %
Titles,0,0.0
Tv,26,6.5
Ep,124,31.0
SD,0,0.0
Ed,14,3.5
Ratings,0,0.0
Genres,0,0.0
Source,0,0.0
Demographic,0,0.0
Producers,0,0.0


In [46]:

# identify all object (text) columns
text_cols = df.select_dtypes(include='object').columns

# clean each text column
for col in text_cols:
    df[col] = df[col].apply(
        lambda x: str(x).replace('\xa0', ' ').strip() if pd.notna(x) else np.nan
    )



In [47]:
df.head()

Unnamed: 0,Titles,Tv,Ep,SD,Ed,Ratings,Genres,Source,Demographic,Producers
0,Fullmetal Alchemist: Brotherhood,TV,64 eps,Apr 2009,Jul 2010,9.09,"['Action', 'Adventure', 'Drama', 'Fantasy', 'M...",Manga,Shounen,"['Aniplex', 'Square Enix', 'Mainichi Broadcast..."
1,Steins;Gate,TV,24 eps,Apr 2011,Sep 2011,9.07,"['Drama', 'Sci-Fi', 'Suspense', 'Psychological...",Drama,Not specified,"['Frontier Works', 'Media Factory', 'Kadokawa ..."
2,Gintama°,TV,51 eps,Apr 2015,Mar 2016,9.06,"['Action', 'Comedy', 'Sci-Fi', 'Gag Humor', 'H...",Manga,Shounen,"['TV Tokyo', 'Aniplex', 'Dentsu']"
3,Shingeki no Kyojin Season 3 Part 2,TV,10 eps,Apr 2019,Jul 2019,9.05,"['Action', 'Drama', 'Suspense', 'Gore', 'Milit...",Manga,Shounen,"['Production I.G', 'Dentsu', 'Mainichi Broadca..."
4,Gintama: The Final,Movie,,Jan 2021,Jan 2021,9.04,"['Action', 'Comedy', 'Drama', 'Sci-Fi', 'Gag H...",Manga,Shounen,"['TV Tokyo', 'Aniplex', 'Dentsu', 'Bandai', 'W..."


In [48]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Titles       400 non-null    object 
 1   Tv           374 non-null    object 
 2   Ep           276 non-null    object 
 3   SD           400 non-null    object 
 4   Ed           386 non-null    object 
 5   Ratings      400 non-null    float64
 6   Genres       400 non-null    object 
 7   Source       400 non-null    object 
 8   Demographic  400 non-null    object 
 9   Producers    400 non-null    object 
dtypes: float64(1), object(9)
memory usage: 31.4+ KB


In [49]:
df.isna().sum()

Titles           0
Tv              26
Ep             124
SD               0
Ed              14
Ratings          0
Genres           0
Source           0
Demographic      0
Producers        0
dtype: int64

In [50]:
# Fill missing in categorical columns with 'Unknown' or 'Ongoing'

df['Tv'] = df['Tv'].fillna('Unknown')
df['Ed'] = df['Ed'].fillna('Ongoing')

# For Episodes, either keep NaN or fill with 'Unknown'
df['Ep'] = df['Ep'].fillna('Unknown')


In [51]:

df.isna().sum()


Titles         0
Tv             0
Ep             0
SD             0
Ed             0
Ratings        0
Genres         0
Source         0
Demographic    0
Producers      0
dtype: int64

In [53]:
df.head()

Unnamed: 0,Titles,Tv,Ep,SD,Ed,Ratings,Genres,Source,Demographic,Producers
0,Fullmetal Alchemist: Brotherhood,TV,64 eps,Apr 2009,Jul 2010,9.09,"['Action', 'Adventure', 'Drama', 'Fantasy', 'M...",Manga,Shounen,"['Aniplex', 'Square Enix', 'Mainichi Broadcast..."
1,Steins;Gate,TV,24 eps,Apr 2011,Sep 2011,9.07,"['Drama', 'Sci-Fi', 'Suspense', 'Psychological...",Drama,Not specified,"['Frontier Works', 'Media Factory', 'Kadokawa ..."
2,Gintama°,TV,51 eps,Apr 2015,Mar 2016,9.06,"['Action', 'Comedy', 'Sci-Fi', 'Gag Humor', 'H...",Manga,Shounen,"['TV Tokyo', 'Aniplex', 'Dentsu']"
3,Shingeki no Kyojin Season 3 Part 2,TV,10 eps,Apr 2019,Jul 2019,9.05,"['Action', 'Drama', 'Suspense', 'Gore', 'Milit...",Manga,Shounen,"['Production I.G', 'Dentsu', 'Mainichi Broadca..."
4,Gintama: The Final,Movie,Unknown,Jan 2021,Jan 2021,9.04,"['Action', 'Comedy', 'Drama', 'Sci-Fi', 'Gag H...",Manga,Shounen,"['TV Tokyo', 'Aniplex', 'Dentsu', 'Bandai', 'W..."


In [54]:
df['Ep_num'] = pd.to_numeric(df["Ep"], errors = 'coerce')

In [55]:

print(df[['Ep', 'Ep_num']].head(15))


print("\nMissing after conversion:", df['Ep_num'].isna().sum())

         Ep  Ep_num
0    64 eps     NaN
1    24 eps     NaN
2    51 eps     NaN
3    10 eps     NaN
4   Unknown     NaN
5    51 eps     NaN
6   Unknown     NaN
7   Unknown     NaN
8    13 eps     NaN
9    13 eps     NaN
10   13 eps     NaN
11   12 eps     NaN
12   13 eps     NaN
13  Unknown     NaN
14  Unknown     NaN

Missing after conversion: 400


In [56]:
df

Unnamed: 0,Titles,Tv,Ep,SD,Ed,Ratings,Genres,Source,Demographic,Producers,Ep_num
0,Fullmetal Alchemist: Brotherhood,TV,64 eps,Apr 2009,Jul 2010,9.09,"['Action', 'Adventure', 'Drama', 'Fantasy', 'M...",Manga,Shounen,"['Aniplex', 'Square Enix', 'Mainichi Broadcast...",
1,Steins;Gate,TV,24 eps,Apr 2011,Sep 2011,9.07,"['Drama', 'Sci-Fi', 'Suspense', 'Psychological...",Drama,Not specified,"['Frontier Works', 'Media Factory', 'Kadokawa ...",
2,Gintama°,TV,51 eps,Apr 2015,Mar 2016,9.06,"['Action', 'Comedy', 'Sci-Fi', 'Gag Humor', 'H...",Manga,Shounen,"['TV Tokyo', 'Aniplex', 'Dentsu']",
3,Shingeki no Kyojin Season 3 Part 2,TV,10 eps,Apr 2019,Jul 2019,9.05,"['Action', 'Drama', 'Suspense', 'Gore', 'Milit...",Manga,Shounen,"['Production I.G', 'Dentsu', 'Mainichi Broadca...",
4,Gintama: The Final,Movie,Unknown,Jan 2021,Jan 2021,9.04,"['Action', 'Comedy', 'Drama', 'Sci-Fi', 'Gag H...",Manga,Shounen,"['TV Tokyo', 'Aniplex', 'Dentsu', 'Bandai', 'W...",
...,...,...,...,...,...,...,...,...,...,...,...
395,Ashita no Joe 2,TV,47 eps,Oct 1980,Aug 1981,8.72,"['Drama', 'Sports', 'Combat Sports', 'Shounen']",Manga,Shounen,['Annapuru'],
396,Shouwa Genroku Rakugo Shinjuu: Sukeroku Futata...,TV,12 eps,Jan 2017,Mar 2017,8.71,"['Drama', 'Adult Cast', 'Historical', 'Love Po...",Manga,Josei,"['Mainichi Broadcasting System', 'Kodansha', '...",
397,Mob Psycho 100 III,TV,12 eps,Oct 2022,Dec 2022,8.71,"['Action', 'Comedy', 'Supernatural', 'Super Po...",Web manga,Not specified,"['Shogakukan-Shueisha Productions', 'Warner Br...",
398,Rurouni Kenshin: Meiji Kenkaku Romantan - Tsui...,Unknown,Unknown,Feb 1999,Sep 1999,8.70,"['Action', 'Drama', 'Romance', 'Adult Cast', '...",Manga,Shounen,['Aniplex'],


In [57]:

df['Ep_clean'] = df['Ep'].str.replace('eps', '', regex=False).str.strip()

df['Ep_num'] = pd.to_numeric(df['Ep_clean'], errors='coerce')

print(df[['Ep', 'Ep_clean', 'Ep_num']].head(15))

print("\nMissing after conversion:", df['Ep_num'].isna().sum())


         Ep Ep_clean  Ep_num
0    64 eps       64    64.0
1    24 eps       24    24.0
2    51 eps       51    51.0
3    10 eps       10    10.0
4   Unknown  Unknown     NaN
5    51 eps       51    51.0
6   Unknown  Unknown     NaN
7   Unknown  Unknown     NaN
8    13 eps       13    13.0
9    13 eps       13    13.0
10   13 eps       13    13.0
11   12 eps       12    12.0
12   13 eps       13    13.0
13  Unknown  Unknown     NaN
14  Unknown  Unknown     NaN

Missing after conversion: 124


In [58]:
df['Episodes_Label'] = df['Ep_num'].fillna('Unknown')


In [62]:
#df

In [60]:
# keep Ep_num (numeric) and optionally Episodes_Label, drop original Ep & Ep_clean
df.drop(columns=['Ep', 'Ep_clean'], inplace=True, errors='ignore')

# check columns now

df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Titles          400 non-null    object 
 1   Tv              400 non-null    object 
 2   SD              400 non-null    object 
 3   Ed              400 non-null    object 
 4   Ratings         400 non-null    float64
 5   Genres          400 non-null    object 
 6   Source          400 non-null    object 
 7   Demographic     400 non-null    object 
 8   Producers       400 non-null    object 
 9   Ep_num          276 non-null    float64
 10  Episodes_Label  400 non-null    object 
dtypes: float64(2), object(9)
memory usage: 34.5+ KB


In [61]:
df.head()

Unnamed: 0,Titles,Tv,SD,Ed,Ratings,Genres,Source,Demographic,Producers,Ep_num,Episodes_Label
0,Fullmetal Alchemist: Brotherhood,TV,Apr 2009,Jul 2010,9.09,"['Action', 'Adventure', 'Drama', 'Fantasy', 'M...",Manga,Shounen,"['Aniplex', 'Square Enix', 'Mainichi Broadcast...",64.0,64.0
1,Steins;Gate,TV,Apr 2011,Sep 2011,9.07,"['Drama', 'Sci-Fi', 'Suspense', 'Psychological...",Drama,Not specified,"['Frontier Works', 'Media Factory', 'Kadokawa ...",24.0,24.0
2,Gintama°,TV,Apr 2015,Mar 2016,9.06,"['Action', 'Comedy', 'Sci-Fi', 'Gag Humor', 'H...",Manga,Shounen,"['TV Tokyo', 'Aniplex', 'Dentsu']",51.0,51.0
3,Shingeki no Kyojin Season 3 Part 2,TV,Apr 2019,Jul 2019,9.05,"['Action', 'Drama', 'Suspense', 'Gore', 'Milit...",Manga,Shounen,"['Production I.G', 'Dentsu', 'Mainichi Broadca...",10.0,10.0
4,Gintama: The Final,Movie,Jan 2021,Jan 2021,9.04,"['Action', 'Comedy', 'Drama', 'Sci-Fi', 'Gag H...",Manga,Shounen,"['TV Tokyo', 'Aniplex', 'Dentsu', 'Bandai', 'W...",,Unknown


In [63]:
import ast

# Convert the string representation of list into an actual Python list
df['Genres_list'] = df['Genres'].apply(lambda x: ast.literal_eval(x) if pd.notna(x) else [])

# Quick check first rows
df[['Genres', 'Genres_list']].head()


Unnamed: 0,Genres,Genres_list
0,"['Action', 'Adventure', 'Drama', 'Fantasy', 'M...","[Action, Adventure, Drama, Fantasy, Military, ..."
1,"['Drama', 'Sci-Fi', 'Suspense', 'Psychological...","[Drama, Sci-Fi, Suspense, Psychological, Time ..."
2,"['Action', 'Comedy', 'Sci-Fi', 'Gag Humor', 'H...","[Action, Comedy, Sci-Fi, Gag Humor, Historical..."
3,"['Action', 'Drama', 'Suspense', 'Gore', 'Milit...","[Action, Drama, Suspense, Gore, Military, Surv..."
4,"['Action', 'Comedy', 'Drama', 'Sci-Fi', 'Gag H...","[Action, Comedy, Drama, Sci-Fi, Gag Humor, His..."


In [64]:
df['Producers_list'] = df['Producers'].apply(lambda x: ast.literal_eval(x) if pd.notna(x) else [])


In [65]:
df

Unnamed: 0,Titles,Tv,SD,Ed,Ratings,Genres,Source,Demographic,Producers,Ep_num,Episodes_Label,Genres_list,Producers_list
0,Fullmetal Alchemist: Brotherhood,TV,Apr 2009,Jul 2010,9.09,"['Action', 'Adventure', 'Drama', 'Fantasy', 'M...",Manga,Shounen,"['Aniplex', 'Square Enix', 'Mainichi Broadcast...",64.0,64.0,"[Action, Adventure, Drama, Fantasy, Military, ...","[Aniplex, Square Enix, Mainichi Broadcasting S..."
1,Steins;Gate,TV,Apr 2011,Sep 2011,9.07,"['Drama', 'Sci-Fi', 'Suspense', 'Psychological...",Drama,Not specified,"['Frontier Works', 'Media Factory', 'Kadokawa ...",24.0,24.0,"[Drama, Sci-Fi, Suspense, Psychological, Time ...","[Frontier Works, Media Factory, Kadokawa Shote..."
2,Gintama°,TV,Apr 2015,Mar 2016,9.06,"['Action', 'Comedy', 'Sci-Fi', 'Gag Humor', 'H...",Manga,Shounen,"['TV Tokyo', 'Aniplex', 'Dentsu']",51.0,51.0,"[Action, Comedy, Sci-Fi, Gag Humor, Historical...","[TV Tokyo, Aniplex, Dentsu]"
3,Shingeki no Kyojin Season 3 Part 2,TV,Apr 2019,Jul 2019,9.05,"['Action', 'Drama', 'Suspense', 'Gore', 'Milit...",Manga,Shounen,"['Production I.G', 'Dentsu', 'Mainichi Broadca...",10.0,10.0,"[Action, Drama, Suspense, Gore, Military, Surv...","[Production I.G, Dentsu, Mainichi Broadcasting..."
4,Gintama: The Final,Movie,Jan 2021,Jan 2021,9.04,"['Action', 'Comedy', 'Drama', 'Sci-Fi', 'Gag H...",Manga,Shounen,"['TV Tokyo', 'Aniplex', 'Dentsu', 'Bandai', 'W...",,Unknown,"[Action, Comedy, Drama, Sci-Fi, Gag Humor, His...","[TV Tokyo, Aniplex, Dentsu, Bandai, Warner Bro..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,Ashita no Joe 2,TV,Oct 1980,Aug 1981,8.72,"['Drama', 'Sports', 'Combat Sports', 'Shounen']",Manga,Shounen,['Annapuru'],47.0,47.0,"[Drama, Sports, Combat Sports, Shounen]",[Annapuru]
396,Shouwa Genroku Rakugo Shinjuu: Sukeroku Futata...,TV,Jan 2017,Mar 2017,8.71,"['Drama', 'Adult Cast', 'Historical', 'Love Po...",Manga,Josei,"['Mainichi Broadcasting System', 'Kodansha', '...",12.0,12.0,"[Drama, Adult Cast, Historical, Love Polygon, ...","[Mainichi Broadcasting System, Kodansha, DAX P..."
397,Mob Psycho 100 III,TV,Oct 2022,Dec 2022,8.71,"['Action', 'Comedy', 'Supernatural', 'Super Po...",Web manga,Not specified,"['Shogakukan-Shueisha Productions', 'Warner Br...",12.0,12.0,"[Action, Comedy, Supernatural, Super Power]","[Shogakukan-Shueisha Productions, Warner Bros...."
398,Rurouni Kenshin: Meiji Kenkaku Romantan - Tsui...,Unknown,Feb 1999,Sep 1999,8.70,"['Action', 'Drama', 'Romance', 'Adult Cast', '...",Manga,Shounen,['Aniplex'],,Unknown,"[Action, Drama, Romance, Adult Cast, Historica...",[Aniplex]


### Convert SD and Ed to datetime

In [67]:
df['SD_date'] = pd.to_datetime(df['SD'], format='%b %Y', errors='coerce')
df['Ed_date'] = pd.to_datetime(df['Ed'], format='%b %Y', errors='coerce')

df[['SD','SD_date','Ed','Ed_date']].head(10)


Unnamed: 0,SD,SD_date,Ed,Ed_date
0,Apr 2009,2009-04-01,Jul 2010,2010-07-01
1,Apr 2011,2011-04-01,Sep 2011,2011-09-01
2,Apr 2015,2015-04-01,Mar 2016,2016-03-01
3,Apr 2019,2019-04-01,Jul 2019,2019-07-01
4,Jan 2021,2021-01-01,Jan 2021,2021-01-01
5,Apr 2011,2011-04-01,Mar 2012,2012-03-01
6,Oct 2011,2011-10-01,Sep 2014,2014-09-01
7,Jan 1988,1988-01-01,Mar 1997,1997-03-01
8,Oct 2012,2012-10-01,Mar 2013,2013-03-01
9,Oct 2022,2022-10-01,Dec 2022,2022-12-01


In [68]:
# make everything title case or consistent
df['Tv'] = df['Tv'].str.title()
df['Source'] = df['Source'].str.title()
df['Demographic'] = df['Demographic'].str.title()

# optional: replace 'Not Specified' with 'Unknown'
df['Demographic'] = df['Demographic'].replace('Not Specified','Unknown')


In [69]:
df.drop(columns=['SD','Ed','Genres','Producers'], inplace=True)


In [70]:
df

Unnamed: 0,Titles,Tv,Ratings,Source,Demographic,Ep_num,Episodes_Label,Genres_list,Producers_list,SD_date,Ed_date
0,Fullmetal Alchemist: Brotherhood,Tv,9.09,Manga,Shounen,64.0,64.0,"[Action, Adventure, Drama, Fantasy, Military, ...","[Aniplex, Square Enix, Mainichi Broadcasting S...",2009-04-01,2010-07-01
1,Steins;Gate,Tv,9.07,Drama,Unknown,24.0,24.0,"[Drama, Sci-Fi, Suspense, Psychological, Time ...","[Frontier Works, Media Factory, Kadokawa Shote...",2011-04-01,2011-09-01
2,Gintama°,Tv,9.06,Manga,Shounen,51.0,51.0,"[Action, Comedy, Sci-Fi, Gag Humor, Historical...","[TV Tokyo, Aniplex, Dentsu]",2015-04-01,2016-03-01
3,Shingeki no Kyojin Season 3 Part 2,Tv,9.05,Manga,Shounen,10.0,10.0,"[Action, Drama, Suspense, Gore, Military, Surv...","[Production I.G, Dentsu, Mainichi Broadcasting...",2019-04-01,2019-07-01
4,Gintama: The Final,Movie,9.04,Manga,Shounen,,Unknown,"[Action, Comedy, Drama, Sci-Fi, Gag Humor, His...","[TV Tokyo, Aniplex, Dentsu, Bandai, Warner Bro...",2021-01-01,2021-01-01
...,...,...,...,...,...,...,...,...,...,...,...
395,Ashita no Joe 2,Tv,8.72,Manga,Shounen,47.0,47.0,"[Drama, Sports, Combat Sports, Shounen]",[Annapuru],1980-10-01,1981-08-01
396,Shouwa Genroku Rakugo Shinjuu: Sukeroku Futata...,Tv,8.71,Manga,Josei,12.0,12.0,"[Drama, Adult Cast, Historical, Love Polygon, ...","[Mainichi Broadcasting System, Kodansha, DAX P...",2017-01-01,2017-03-01
397,Mob Psycho 100 III,Tv,8.71,Web Manga,Unknown,12.0,12.0,"[Action, Comedy, Supernatural, Super Power]","[Shogakukan-Shueisha Productions, Warner Bros....",2022-10-01,2022-12-01
398,Rurouni Kenshin: Meiji Kenkaku Romantan - Tsui...,Unknown,8.70,Manga,Shounen,,Unknown,"[Action, Drama, Romance, Adult Cast, Historica...",[Aniplex],1999-02-01,1999-09-01


In [77]:
# Save cleaned dataframe to CSV
df.to_csv('Cleaned_Anime_Data_1.csv', index=False)


### 📝 Steps We Performed 

Loaded raw data using pandas.read_csv().

Dropped unnecessary index column (Unnamed: 0).

Standardized string columns (removed extra spaces, consistent case).

Handled missing values:

Converted “Unknown” / “Not specified” in Demographic to NaN.

For movies, filled missing Ep_num with 1.

Split list-like columns (Genres & Producers) into proper Python lists.

Converted dates in SD & Ed columns to datetime objects.

Standardized list entries: stripped spaces & consistent casing for genres/producers.

