In [1]:
import pandas as pd
import numpy as np
from numpy import mean,std,absolute,arange
from sklearn.linear_model import LinearRegression,Lasso, LassoCV
from sklearn.model_selection import cross_val_score,RepeatedKFold,train_test_split
from sklearn.metrics import mean_absolute_error

In [2]:
animes_df = pd.read_csv("Anime.csv").dropna().reset_index(drop=True)
animes_df['Rating'].unique()

array(['R - 17+ (violence & profanity)', 'PG-13 - Teens 13 or older',
       'R+ - Mild Nudity', 'PG - Children', 'G - All Ages', 'None'],
      dtype=object)

In [3]:
def trans_rating(rating):
    if (rating == "None"):
        return 0
    elif (rating == "PG - Children"):
        return 1
    elif (rating == "G - All Ages"):
        return 2
    elif (rating == "PG-13 - Teens 13 or older"):
        return 3
    elif (rating == "R - 17+ (violence & profanity)"):
        return 4
    elif (rating == "R+ - Mild Nudity"):
        return 5

In [4]:
anime_feature = animes_df[['ID','Title','Popularity','Duration_Minutes',
                          'Rating','Score','Scored_Users','Members','Favorites','Episodes']]
anime_feature['rating_level'] = anime_feature['Rating'].apply(lambda x: trans_rating(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anime_feature['rating_level'] = anime_feature['Rating'].apply(lambda x: trans_rating(x))


In [5]:
features = anime_feature[['Episodes','Duration_Minutes','Rating']]
features = pd.get_dummies(features, columns = ['Rating'])
scores = anime_feature['Score']
result = pd.DataFrame(columns=["model", "MAE"])

In [6]:
# Linear regression
feature_train, feature_test, score_train, score_test = train_test_split(features, scores, test_size=0.2, random_state=42)
lreg = LinearRegression().fit(feature_train, score_train)
lreg_pred = lreg.predict(feature_test)
result.loc[len(result)] = ["Linear", mean_absolute_error(score_test, lreg_pred)]

In [7]:
# LASSO regression
lasso_model = Lasso(alpha=0.1)
lasso_model.fit(feature_train,score_train)
lasso_pred = lasso_model.predict(feature_test)
result.loc[len(result)] = ["LASSO", mean_absolute_error(score_test, lasso_pred)]

In [8]:
# Ridge model
from sklearn.linear_model import Ridge,RidgeCV
ridge_model = Ridge(alpha=0.1)
ridge_model.fit(feature_train,score_train)
ridge_pred = ridge_model.predict(feature_test)
result.loc[len(result)] = ["Ridge", mean_absolute_error(score_test, ridge_pred)]

In [9]:
# Decision Tree Regressor
from sklearn.tree import DecisionTreeRegressor
regressor = DecisionTreeRegressor(random_state=0)
regressor.fit(feature_train,score_train)
dt_pred = regressor.predict(feature_test)
result.loc[len(result)] = ["Decision Tree", mean_absolute_error(score_test, dt_pred)]

In [10]:
# Random Forest Regressor
from sklearn.ensemble import RandomForestRegressor
rfregressor = RandomForestRegressor(n_estimators = 100, random_state = 0)
rfregressor.fit(feature_train,score_train)
rf_pred = rfregressor.predict(feature_test)
result.loc[len(result)] = ["Random Forest", mean_absolute_error(score_test, rf_pred)]

In [11]:
# Gradient Boosting Regressor
from sklearn.ensemble import GradientBoostingRegressor
gbregressor = GradientBoostingRegressor(n_estimators = 100, random_state = 0)
gbregressor.fit(feature_train,score_train)
gb_pred = gbregressor.predict(feature_test)
result.loc[len(result)] = ["Gradient Boosting", mean_absolute_error(score_test, gb_pred)]

In [12]:
# AdaBoost Regressor
from sklearn.ensemble import AdaBoostRegressor
adareg = AdaBoostRegressor(n_estimators = 100, random_state = 0)
adareg.fit(feature_train,score_train)
ada_pred = adareg.predict(feature_test)
result.loc[len(result)] = ["AdaBoost", mean_absolute_error(score_test, ada_pred)]

In [13]:
# XGBoost Regressor
import xgboost as xg
xgreg = xg.XGBRegressor().fit(feature_train,score_train)
xg_pred = xgreg.predict(feature_test)
result.loc[len(result)] = ["XGBoost", mean_absolute_error(score_test, xg_pred)]

In [14]:
# MLP Regressor
from sklearn.neural_network import MLPRegressor
mlpreg = MLPRegressor().fit(feature_train, score_train)
mlp_pred = mlpreg.predict(feature_test)
result.loc[len(result)] = ["MLP", mean_absolute_error(score_test, mlp_pred)]

In [16]:
result.sort_values(by=["MAE"])

Unnamed: 0,model,MAE
5,Gradient Boosting,0.583202
7,XGBoost,0.583274
4,Random Forest,0.597309
8,MLP,0.620529
3,Decision Tree,0.621616
6,AdaBoost,0.639075
0,Linear,0.654228
2,Ridge,0.654228
1,LASSO,0.701447
