In [1]:
import pandas as pd
import numpy as np
from numpy import mean,std,absolute,arange
from sklearn.linear_model import LinearRegression,Lasso, LassoCV
from sklearn.model_selection import cross_val_score,RepeatedKFold,train_test_split
from sklearn.metrics import mean_absolute_error

In [2]:
animes_df = pd.read_csv("Anime.csv").dropna().reset_index()
animes_df = animes_df[(animes_df["Producers"]!="Unknown") & 
    (animes_df["Producers"]!="Ltd.")]

In [3]:
anime_feature = animes_df[['ID','Title','Popularity','Duration_Minutes',
                          'Rating','Score','Scored_Users','Members','Favorites','Episodes']]

In [4]:
anime_feature

Unnamed: 0,ID,Title,Popularity,Duration_Minutes,Rating,Score,Scored_Users,Members,Favorites,Episodes
0,16498,Shingeki no Kyojin,1,24.0,R - 17+ (violence & profanity),8.531,519803.0,3524109,155695,25.0
1,1535,Death Note,2,23.0,R - 17+ (violence & profanity),8.621,485487.0,3504535,159701,37.0
2,5114,Fullmetal Alchemist: Brotherhood,3,24.0,R - 17+ (violence & profanity),9.131,900398.0,2978455,207772,64.0
3,30276,One Punch Man,4,24.0,R - 17+ (violence & profanity),8.511,19066.0,2879907,59651,12.0
4,11757,Sword Art Online,5,23.0,PG-13 - Teens 13 or older,7.201,990254.0,2813565,64997,25.0
...,...,...,...,...,...,...,...,...,...,...
13009,39939,Emerald no Oka,17567,4.0,G - All Ages,5.791,114.0,199,0,1.0
13010,40058,Arui Tekoteko,17576,2.0,G - All Ages,5.141,108.0,198,0,1.0
13019,52049,Yamanai Ame,17655,4.0,PG-13 - Teens 13 or older,5.991,111.0,194,0,1.0
13023,52638,Fukuro,17677,3.0,R+ - Mild Nudity,5.081,103.0,192,0,1.0


In [5]:
features = anime_feature[['Episodes','Duration_Minutes','Rating']]
features = pd.get_dummies(features, columns = ['Rating'])
scores = anime_feature['Score']
result = pd.DataFrame(columns=["model", "MAE"])

In [6]:
# Linear regression
feature_train, feature_test, score_train, score_test = train_test_split(features, scores, test_size=0.2, random_state=42)
lreg = LinearRegression().fit(feature_train, score_train)
lreg_pred = lreg.predict(feature_test)
result.loc[len(result)] = ["Linear", mean_absolute_error(score_test, lreg_pred)]

In [7]:
# LASSO regression
lasso_model = Lasso(alpha=0.1)
lasso_model.fit(feature_train,score_train)
lasso_pred = lasso_model.predict(feature_test)
result.loc[len(result)] = ["LASSO", mean_absolute_error(score_test, lasso_pred)]

In [8]:
# Ridge model
from sklearn.linear_model import Ridge,RidgeCV
ridge_model = Ridge(alpha=0.1)
ridge_model.fit(feature_train,score_train)
ridge_pred = ridge_model.predict(feature_test)
result.loc[len(result)] = ["Ridge", mean_absolute_error(score_test, ridge_pred)]

In [9]:
# Decision Tree Regressor
from sklearn.tree import DecisionTreeRegressor
regressor = DecisionTreeRegressor(random_state=0)
regressor.fit(feature_train,score_train)
dt_pred = regressor.predict(feature_test)
result.loc[len(result)] = ["Decision Tree", mean_absolute_error(score_test, dt_pred)]

In [10]:
# Random Forest Regressor
from sklearn.ensemble import RandomForestRegressor
rfregressor = RandomForestRegressor(n_estimators = 100, random_state = 0)
rfregressor.fit(feature_train,score_train)
rf_pred = rfregressor.predict(feature_test)
result.loc[len(result)] = ["Random Forest", mean_absolute_error(score_test, rf_pred)]

In [11]:
# Gradient Boosting Regressor
from sklearn.ensemble import GradientBoostingRegressor
gbregressor = GradientBoostingRegressor(n_estimators = 100, random_state = 0)
gbregressor.fit(feature_train,score_train)
gb_pred = gbregressor.predict(feature_test)
result.loc[len(result)] = ["Gradient Boosting", mean_absolute_error(score_test, gb_pred)]

In [12]:
# AdaBoost Regressor
from sklearn.ensemble import AdaBoostRegressor
adareg = AdaBoostRegressor(n_estimators = 100, random_state = 0)
adareg.fit(feature_train,score_train)
ada_pred = adareg.predict(feature_test)
result.loc[len(result)] = ["AdaBoost", mean_absolute_error(score_test, ada_pred)]

In [13]:
# XGBoost Regressor
import xgboost as xg
xgreg = xg.XGBRegressor().fit(feature_train,score_train)
xg_pred = xgreg.predict(feature_test)
result.loc[len(result)] = ["XGBoost", mean_absolute_error(score_test, xg_pred)]

In [14]:
# MLP Regressor
from sklearn.neural_network import MLPRegressor
mlpreg = MLPRegressor().fit(feature_train, score_train)
mlp_pred = mlpreg.predict(feature_test)
result.loc[len(result)] = ["MLP", mean_absolute_error(score_test, mlp_pred)]

In [15]:
result.sort_values(by=["MAE"])

Unnamed: 0,model,MAE
5,Gradient Boosting,0.579141
7,XGBoost,0.59273
4,Random Forest,0.60667
3,Decision Tree,0.649024
0,Linear,0.652007
2,Ridge,0.65201
6,AdaBoost,0.666612
1,LASSO,0.709647
8,MLP,0.937497
