In [35]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from scipy.stats import kurtosis, skew
import numpy as np




df = pd.read_csv("vgsales.csv")
df = df.dropna(subset=['Year'])


num_rows_to_use = 200
columns_to_use = ['Year', 'NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales']
df = df.loc[:num_rows_to_use - 1, columns_to_use]


features = ['Year', 'NA_Sales']
X = df[features]
y = df['Global_Sales']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


dt_model = DecisionTreeRegressor()
dt_model.fit(X_train, y_train)
dt_predictions = dt_model.predict(X_test)


print("Maximum Aptitude (Decision Tree):", np.max(dt_predictions))
print("Minimum Aptitude (Decision Tree):", np.min(dt_predictions))
print("Energy (Decision Tree):", np.sum(dt_predictions**2))
print("Skewness (Decision Tree):", skew(dt_predictions))
print("Kurtosis:", kurtosis(dt_predictions))
print("Medium Kurtosis (Decision Tree):", np.median(kurtosis(dt_predictions)))

print("------------------------------------------------")

knn_model = KNeighborsRegressor()
knn_model.fit(X_train, y_train)
knn_predictions = knn_model.predict(X_test)

print("Maximum Aptitude (KNN):", np.max(knn_predictions))
print("Minimum Aptitude (KNN):", np.min(knn_predictions))
print("Energy (KNN):", np.sum(knn_predictions**2))
print("Skewness (KNN):", skew(knn_predictions))
print("Kurtosis:", kurtosis(knn_predictions))
print("Medium Kurtosis (KNN):", np.median(kurtosis(knn_predictions)))



Maximum Aptitude (Decision Tree): 40.24
Minimum Aptitude (Decision Tree): 5.08
Energy (Decision Tree): 5926.898700000001
Skewness (Decision Tree): 2.7458098998030067
Kurtosis: 7.532787971898699
Medium Kurtosis (Decision Tree): 7.532787971898699
------------------------------------------------
Maximum Aptitude (KNN): 28.22
Minimum Aptitude (KNN): 5.384
Energy (KNN): 4343.666152
Skewness (KNN): 2.6846565333656347
Kurtosis: 7.1299463164484855
Medium Kurtosis (KNN): 7.1299463164484855


In [32]:

print("----------------other matrics---------------- ")

print("Decision Tree Metrics:")
print("MAE:", mean_absolute_error(y_test, dt_predictions))
print("MSE:", mean_squared_error(y_test, dt_predictions))
print("R-squared:", r2_score(y_test, dt_predictions))

print("\nKNN Metrics:")
print("MAE:", mean_absolute_error(y_test, knn_predictions))
print("MSE:", mean_squared_error(y_test, knn_predictions))
print("R-squared:", r2_score(y_test, knn_predictions))


----------------other matrics---------------- 
Decision Tree Metrics:
MAE: 2.9225000000000003
MSE: 16.943910000000002
R-squared: 0.36697265392520495

KNN Metrics:
MAE: 2.057
MSE: 7.5970038
R-squared: 0.7161746519171708


In [36]:
df.describe

<bound method NDFrame.describe of        Year  NA_Sales  EU_Sales  JP_Sales  Other_Sales  Global_Sales
0    2006.0     41.49     29.02      3.77         8.46         82.74
1    1985.0     29.08      3.58      6.81         0.77         40.24
2    2008.0     15.85     12.88      3.79         3.31         35.82
3    2009.0     15.75     11.01      3.28         2.96         33.00
4    1996.0     11.27      8.89     10.22         1.00         31.37
..      ...       ...       ...       ...          ...           ...
195  2006.0      3.81      0.63      0.00         0.68          5.12
196  2009.0      1.96      1.43      1.08         0.65          5.11
197  2014.0      2.66      2.01      0.00         0.41          5.08
198  2006.0      1.70      2.02      0.16         1.21          5.08
199  2010.0      0.60      3.29      0.06         1.13          5.08

[199 rows x 6 columns]>