In [55]:
import pandas as pd
import os
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [56]:
df = pd.read_csv(os.path.join('data', 'merged_download.csv'), index_col=False)
df = df.drop(['chipsettime','cellid','gpstime','rnti','throughput','rb0','rb1', 'caindex','scc','rsrq','mcs0','mcs1','operator'], axis=1)
print(df.shape)

(256800, 12)


In [57]:
speed_col = df.pop('speed')
df = df.assign(speed=speed_col)

In [58]:
X = df.drop('speed', axis=1)
y = df['speed']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [59]:
rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)
rf_regressor.fit(X_train, y_train)

y_pred = rf_regressor.predict(X_test)

In [60]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)

In [61]:
print('MAE: {:.2f}'.format(mae))
print('MSE: {:.2f}'.format(mse))
print('RMSE: {:.2f}'.format(rmse))
print('R2: {:.2f}'.format(r2))

MAE: 0.03
MSE: 0.05
RMSE: 0.22
R2: 1.00
