In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
# data analysis and wrangling
import pandas as pd
import numpy as np
import random as rnd

In [3]:
# machine learning
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR 
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor

In [4]:
rs_cols = ['user_id', 'movie_id', 'rating', 'unix_timestamp']
df = pd.read_csv('Dataset/ml-100k/ua.base', sep='\t', names=rs_cols, encoding='latin-1')

In [5]:
df.head()

Unnamed: 0,user_id,movie_id,rating,unix_timestamp
0,1,1,5,874965758
1,1,2,3,876893171
2,1,3,4,878542960
3,1,4,3,876893119
4,1,5,3,889751712


In [6]:
df.columns

Index(['user_id', 'movie_id', 'rating', 'unix_timestamp'], dtype='object')

In [7]:
df=df.drop(columns=['unix_timestamp'])

In [8]:
df.columns

Index(['user_id', 'movie_id', 'rating'], dtype='object')

In [9]:
from sklearn import preprocessing
le = preprocessing.LabelEncoder()

In [10]:
df['user_id']=le.fit_transform(df['user_id'].values)

In [11]:
y=df['rating'].values
x=df.loc[:, df.columns != 'rating'].values

In [12]:

X_train, X_test, Y_trian, Y_test= train_test_split(x,y, test_size=0.2, random_state=0)

In [13]:
df.head

<bound method NDFrame.head of        user_id  movie_id  rating
0            0         1       5
1            0         2       3
2            0         3       4
3            0         4       3
4            0         5       3
...        ...       ...     ...
90565      942      1047       2
90566      942      1074       4
90567      942      1188       3
90568      942      1228       3
90569      942      1330       3

[90570 rows x 3 columns]>

In [14]:
X_train, X_test, Y_trian, Y_test= train_test_split(x,y, test_size=0.2, random_state=0)

### Random Forest


In [15]:

randregrssor=RandomForestRegressor(n_estimators=50)

In [16]:
randregrssor.fit(X_train,Y_trian)

RandomForestRegressor(n_estimators=50)

In [17]:
predictions = randregrssor.predict(X_test)

In [18]:
errors = abs(predictions - Y_test)

In [19]:
print('Mean Absolute Error:', round(np.mean(errors), 2), 'degrees.')

Mean Absolute Error: 0.88 degrees.


In [20]:

# Calculate mean absolute percentage error (MAPE)
mape = 100 * (errors / Y_test)
# Calculate and display accuracy
accuracy = 100 - np.mean(mape)
print('Accuracy:', round(accuracy, 2), '%.')

Accuracy: 65.2 %.


### Decision Tree


In [21]:
dr=DecisionTreeRegressor()

In [22]:
dr.fit(X_train,Y_trian)

DecisionTreeRegressor()

In [23]:
predictions = dr.predict(X_test)

In [24]:
errors = abs(predictions - Y_test)

In [25]:
print('Mean Absolute Error:', round(np.mean(errors), 2), 'degrees.')

Mean Absolute Error: 1.05 degrees.


In [26]:

# Calculate mean absolute percentage error (MAPE)
mape = 100 * (errors / Y_test)
# Calculate and display accuracy
accuracy = 100 - np.mean(mape)
print('Accuracy:', round(accuracy, 2), '%.')

Accuracy: 59.93 %.


### Support Vector Machine

In [27]:
svm=SVR()

In [28]:
svm.fit(X_train,Y_trian)

SVR()

In [29]:
predictions = svm.predict(X_test)

In [30]:
errors = abs(predictions - Y_test)
print('Mean Absolute Error:', round(np.mean(errors), 2), 'degrees.')
# Calculate mean absolute percentage error (MAPE)
mape = 100 * (errors / Y_test)
# Calculate and display accuracy
accuracy = 100 - np.mean(mape)
print('Accuracy:', round(accuracy, 2), '%.')

Mean Absolute Error: 0.87 degrees.
Accuracy: 61.22 %.


### XG Boost

In [32]:
from xgboost import XGBRegressor

In [33]:
xgboost=XGBRegressor(n_estimators=50)

In [34]:
xgboost.fit(X_train,Y_trian)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
             importance_type='gain', interaction_constraints='',
             learning_rate=0.300000012, max_delta_step=0, max_depth=6,
             min_child_weight=1, missing=nan, monotone_constraints='()',
             n_estimators=50, n_jobs=16, num_parallel_tree=1, random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
             tree_method='exact', validate_parameters=1, verbosity=None)

In [35]:
predictions=xgboost.predict(X_test)

In [36]:
errors = abs(predictions - Y_test)
print('Mean Absolute Error:', round(np.mean(errors), 2), 'degrees.')
# Calculate mean absolute percentage error (MAPE)
mape = 100 * (errors / Y_test)
# Calculate and display accuracy
accuracy = 100 - np.mean(mape)
print('Accuracy:', round(accuracy, 2), '%.')

Mean Absolute Error: 0.79 degrees.
Accuracy: 67.39 %.


In [37]:
from keras.callbacks import ModelCheckpoint
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten

model = Sequential()

# The Input Layer :
model.add(Dense(128, kernel_initializer='normal',input_dim = X_train.shape[1], activation='relu'))

# The Hidden Layers :
model.add(Dense(256, kernel_initializer='normal',activation='relu'))
model.add(Dense(256, kernel_initializer='normal',activation='relu'))
model.add(Dense(256, kernel_initializer='normal',activation='relu'))
model.add(Dense(256, kernel_initializer='normal',activation='relu'))
model.add(Dense(256, kernel_initializer='normal',activation='relu'))

# The Output Layer :
model.add(Dense(1, kernel_initializer='normal',activation='linear'))

# Compile the network :
model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['mean_absolute_error'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 128)               384       
_________________________________________________________________
dense_1 (Dense)              (None, 256)               33024     
_________________________________________________________________
dense_2 (Dense)              (None, 256)               65792     
_________________________________________________________________
dense_3 (Dense)              (None, 256)               65792     
_________________________________________________________________
dense_4 (Dense)              (None, 256)               65792     
_________________________________________________________________
dense_5 (Dense)              (None, 256)               65792     
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 2

In [38]:
model.fit(X_train, Y_trian, epochs=10, batch_size=32, validation_split = 0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x15ac5fbc688>

In [39]:
predictions = model.predict(X_test)

In [40]:
errors = abs(predictions - Y_test)
print('Mean Absolute Error:', round(np.mean(errors), 2), 'degrees.')
# Calculate mean absolute percentage error (MAPE)
mape = 100 * (errors / Y_test)
# Calculate and display accuracy
accuracy = 100 - np.mean(mape)
print('Accuracy:', round(accuracy, 2), '%.')

Mean Absolute Error: 0.92 degrees.
Accuracy: 58.48 %.
