References\
https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html \
https://scikit-learn.org/stable/modules/model_evaluation.html

In [32]:
import numpy as np
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt

import warnings
warnings.simplefilter(action='ignore',category=FutureWarning)

df = pd.read_csv('cleaned_data.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,anime_id,Name,English name,Other name,Score,Genres,Synopsis,Type,Episodes,...,SynergySP,Arms,Project No.9,David Production,TNK,Telecom Animation Film,White Fox,Wit Studio,Producer,Duration_mins
0,0,1,Cowboy Bebop,Cowboy Bebop,カウボーイビバップ,8.75,"Action, Award Winning, Sci-Fi","Crime is timeless. By the year 2071, humanity ...",TV,26,...,False,False,False,False,False,False,False,False,Bandai Visual,24
1,2,6,Trigun,Trigun,トライガン,8.22,"Action, Adventure, Sci-Fi","Vash the Stampede is the man with a $$60,000,0...",TV,26,...,False,False,False,False,False,False,False,False,none,24
2,3,7,Witch Hunter Robin,Witch Hunter Robin,Witch Hunter ROBIN (ウイッチハンターロビン),7.25,"Action, Drama, Mystery, Supernatural",Robin Sena is a powerful craft user drafted in...,TV,26,...,False,False,False,False,False,False,False,False,Dentsu,25
3,4,8,Bouken Ou Beet,Beet the Vandel Buster,冒険王ビィト,6.94,"Adventure, Fantasy, Supernatural",It is the dark century and the people are suff...,TV,52,...,False,False,False,False,False,False,False,False,Dentsu,23
4,5,15,Eyeshield 21,UNKNOWN,アイシールド21,7.92,Sports,"Shy, reserved, and small-statured, Deimon High...",TV,145,...,False,False,False,False,False,False,False,False,Nihon Ad Systems,23


In [6]:
# set categorical types
category_cols = ['Rating','Type','Status','Source','season','Studio']
int_cols = ['Rank','Episodes','Scored By','Popularity','Favorites','Members','Premier_Month']
float_cols = ['Score']

genres = ['Action', 'Adventure', 'Avant Garde', 'Award Winning', 'Boys Love', 'Comedy', 'Drama', 'Fantasy', 'Girls Love', 'Gourmet', 'Horror', 'Mystery', 'Romance', 'Sci-Fi', 'Slice of Life', 'Sports', 'Supernatural', 'Suspense', 'Ecchi']
studios = ['Toei Animation', 'Sunrise', 'J.C.Staff', 'Madhouse', 'Studio Deen', 'TMS Entertainment', 'Nippon Animation', 'Pierrot', 'A-1 Pictures', 'OLM', 'Tatsunoko Production', 'Production I.G', 'Gonzo', 'Xebec', 'Bones', 'SILVER LINK.', 'Satelight', 'Doga Kobo', 'Shaft', "Brain's Base", 'LIDENFILMS', 'Group TAC', 'MAPPA', 'Shin-Ei Animation', 'feel.', 'Diomedéa', 'AIC', 'Studio Comet', 'Zexcs', 'Kyoto Animation', 'Lerche', 'Tokyo Movie Shinsha', 'P.A. Works', 'Gallop', 'Ashi Productions', 'Studio Hibari', '8bit', 'DLE', 'Tezuka Productions', 'Seven', 'Studio Gokumi', 'Bandai Namco Pictures', 'Gainax', 'Bee Train', 'Kinema Citrus', 'SynergySP', 'Arms', 'Project No.9', 'David Production', 'TNK', 'Telecom Animation Film', 'White Fox', 'Wit Studio']

types = dict([(i,'category') for i in category_cols])
df = df.astype(types)

## Support Vector Classification
### Parts
#### > Functions
#### > Analysis
    >> Genres
    >> Studios
    >> Genres+Studios 
    >> Genres+Studios+Popularity
    >> Members
    >> Genres+Studios+Popularity+Members


### Functions

In [66]:
from sklearn.svm import SVR
# from sklearn.metrics import mean_squared_error, r2_score, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

def train_svm(X_train, y_train):
  # Define and fit the SVR model
  model = SVR()
  model.fit(X_train, y_train)
  return model

def create_df(df_name, features):
    for i in features:
        df_name[i]=df[i]
    return df_name

def evaluate(features_df, score_array):
    # spliting dataset into 0.2 & 0.8
    X_train, X_test, y_train, y_test = train_test_split(features_df, score_array, test_size=0.2, random_state=42)
    model=train_svm(X_train, y_train)
    
    y_pred= model.predict(X_test)

    # metrics evaluated
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    # Print metrics
    print("Mean Squared Error (MSE):", mse)
    print("Root Mean Squared Error (RMSE):", rmse)
    print("Mean Absolute Error (MAE):", mae)
    print("R-squared score:", r2)
    


In [52]:
#Score array, our indicator for how good an anime is 
target='Score'
score=df[['Score']]
score_array=score.to_numpy().flatten()
print(score_array)

[8.75 8.22 7.25 ... 7.56 6.19 6.09]


### Analysis of Genres

In [53]:
genre_df=pd.DataFrame()
create_df(genre_df, genres)

Unnamed: 0,Action,Adventure,Avant Garde,Award Winning,Boys Love,Comedy,Drama,Fantasy,Girls Love,Gourmet,Horror,Mystery,Romance,Sci-Fi,Slice of Life,Sports,Supernatural,Suspense,Ecchi
0,True,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False
1,True,True,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False
2,True,False,False,False,False,False,True,False,False,False,False,True,False,False,False,False,True,False,False
3,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4276,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False
4277,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4278,True,True,False,False,False,True,False,True,False,False,False,False,False,False,False,False,False,False,False
4279,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,False,False


In [54]:
evaluate(genre_df, score_array)

Mean Squared Error (MSE): 0.5565238686923856
Root Mean Squared Error (RMSE): 0.7460052739038683
Mean Absolute Error (MAE): 0.5706808496449517
R-squared score: 0.14150839394778392


### Analysis of Studios

In [55]:
studios_df=pd.DataFrame()
create_df(studios_df, studios)

Unnamed: 0,Toei Animation,Sunrise,J.C.Staff,Madhouse,Studio Deen,TMS Entertainment,Nippon Animation,Pierrot,A-1 Pictures,OLM,...,Bee Train,Kinema Citrus,SynergySP,Arms,Project No.9,David Production,TNK,Telecom Animation Film,White Fox,Wit Studio
0,False,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,True,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,True,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4276,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4277,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4278,False,False,False,False,False,False,False,False,False,True,...,False,False,False,False,False,False,False,False,False,False
4279,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [56]:
evaluate(studios_df, score_array)

Mean Squared Error (MSE): 0.5403870029686134
Root Mean Squared Error (RMSE): 0.7351101978401697
Mean Absolute Error (MAE): 0.5756193235021845
R-squared score: 0.1664010617221241


### Analysis of Popularity

In [57]:
pop_df=pd.DataFrame()
create_df(pop_df, ['Popularity'])

Unnamed: 0,Popularity
0,43
1,246
2,1795
3,5126
4,1252
...,...
4276,3074
4277,6339
4278,4673
4279,3626


In [58]:
evaluate(pop_df, score_array)

Mean Squared Error (MSE): 0.42818686750383295
Root Mean Squared Error (RMSE): 0.6543598914235444
Mean Absolute Error (MAE): 0.49851227759885836
R-squared score: 0.339480564530794


### Analysis of Genres+Studios

In [59]:
genre_studios=genres+studios
genre_studios_df=pd.DataFrame()
create_df(genre_studios_df, genre_studios)

Unnamed: 0,Action,Adventure,Avant Garde,Award Winning,Boys Love,Comedy,Drama,Fantasy,Girls Love,Gourmet,...,Bee Train,Kinema Citrus,SynergySP,Arms,Project No.9,David Production,TNK,Telecom Animation Film,White Fox,Wit Studio
0,True,False,False,True,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,True,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,True,False,False,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,True,False,False,False,False,False,True,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4276,False,False,False,False,False,True,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4277,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4278,True,True,False,False,False,True,False,True,False,False,...,False,False,False,False,False,False,False,False,False,False
4279,False,False,False,False,False,True,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [60]:
evaluate(genre_studios_df, score_array)

Mean Squared Error (MSE): 0.49586308490871694
Root Mean Squared Error (RMSE): 0.7041754645745029
Mean Absolute Error (MAE): 0.5340561459914308
R-squared score: 0.23508348861027883


### Analysis of Genres+Studios+Popularity

In [61]:
combined=genres+studios+['Popularity']
combined_df=pd.DataFrame()
create_df(combined_df, combined)

Unnamed: 0,Action,Adventure,Avant Garde,Award Winning,Boys Love,Comedy,Drama,Fantasy,Girls Love,Gourmet,...,Kinema Citrus,SynergySP,Arms,Project No.9,David Production,TNK,Telecom Animation Film,White Fox,Wit Studio,Popularity
0,True,False,False,True,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,43
1,True,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,246
2,True,False,False,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,1795
3,False,True,False,False,False,False,False,True,False,False,...,False,False,False,False,False,False,False,False,False,5126
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,1252
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4276,False,False,False,False,False,True,False,False,False,False,...,False,False,False,False,False,False,False,False,False,3074
4277,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,6339
4278,True,True,False,False,False,True,False,True,False,False,...,False,False,False,False,False,False,False,False,False,4673
4279,False,False,False,False,False,True,False,False,False,False,...,False,False,False,False,False,False,False,False,False,3626


In [62]:
evaluate(combined_df, score_array)

Mean Squared Error (MSE): 0.42907863232340565
Root Mean Squared Error (RMSE): 0.6550409394254726
Mean Absolute Error (MAE): 0.4994378073440587
R-squared score: 0.3381049315072283


### Analysis of Members

In [63]:
mm_df=pd.DataFrame()
create_df(mm_df, ['Members'])
evaluate(mm_df, score_array) 

Mean Squared Error (MSE): 0.43724651302831746
Root Mean Squared Error (RMSE): 0.6612461818629408
Mean Absolute Error (MAE): 0.5106286966018371
R-squared score: 0.3255051897551301


### Analysis of Genres+Studios+Popularity+Members

In [64]:
combined2=genres+studios+['Popularity']+['Members']
combined2_df=pd.DataFrame()
create_df(combined2_df, combined2)

Unnamed: 0,Action,Adventure,Avant Garde,Award Winning,Boys Love,Comedy,Drama,Fantasy,Girls Love,Gourmet,...,SynergySP,Arms,Project No.9,David Production,TNK,Telecom Animation Film,White Fox,Wit Studio,Popularity,Members
0,True,False,False,True,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,43,1771505
1,True,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,246,727252
2,True,False,False,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,1795,111931
3,False,True,False,False,False,False,False,True,False,False,...,False,False,False,False,False,False,False,False,5126,15001
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,1252,177688
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4276,False,False,False,False,False,True,False,False,False,False,...,False,False,False,False,False,False,False,False,3074,44954
4277,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,6339,8943
4278,True,True,False,False,False,True,False,True,False,False,...,False,False,False,False,False,False,False,False,4673,19087
4279,False,False,False,False,False,True,False,False,False,False,...,False,False,False,False,False,False,False,False,3626,32829


In [65]:
evaluate(combined2_df, score_array)

Mean Squared Error (MSE): 0.43278492350855013
Root Mean Squared Error (RMSE): 0.6578639095653068
Mean Absolute Error (MAE): 0.5075732022976975
R-squared score: 0.33238762080228423
