# Importing Dependencies

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import xgboost as xg
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.svm import SVR
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import warnings
warnings.filterwarnings('ignore')

# Getting Data Information

In [None]:
dataset = pd.read_csv('/kaggle/input/nba2k20-player-dataset/nba2k-full.csv')

In [None]:
dataset.head()

In [None]:
dataset.shape

In [None]:
dataset.info()

In [None]:
dataset.describe()

# Handling Attributes

In [None]:
dataset["team"].unique()

In [None]:
dataset.isnull().sum()

In [None]:
dataset = dataset.drop(['full_name','b_day','weight','height','college','version'],axis=1)
dataset.head()

In [None]:
dataset=dataset.dropna()
dataset.isnull().sum()

In [None]:
dataset.shape

# Feature Engineering

In [None]:
def remove(value):
    value=value[1:]
    return int(value)

dataset['jersey'] = dataset['jersey'].apply(remove)

In [None]:
dataset['salary'] = dataset['salary'].apply(remove)
dataset.head()

In [None]:
dataset['country'].value_counts()

In [None]:
def others(value):
    if value not in ['USA', 'Canada', 'Australia']:
        return 'Others'
    else:
        return value
dataset['country'] = dataset['country'].apply(others)

In [None]:
dataset['country'].value_counts()

In [None]:
dataset['position'].value_counts()

In [None]:
def removeundraft(value):
    if value=='Undrafted':
        return
    else:
        return int(value)

In [None]:
dataset['draft_round'] = dataset['draft_round'].apply(removeundraft)
dataset['draft_peak'] = dataset['draft_peak'].apply(removeundraft)

In [None]:
dataset.isnull().sum()

In [None]:
dataset = dataset.dropna()

In [None]:
dataset.isnull().sum()

In [None]:
dataset.shape

# Analysing Correlation

In [None]:
dataset.corr()

In [None]:
sns.heatmap(dataset.corr())

# Data Visualising and Analysing

In [None]:
plt.figure(figsize=(8,6))
sns.pairplot(dataset)
plt.show()

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(8, 6))
sns.boxplot(dataset['salary'], ax=axes[0])
sns.boxplot(dataset['rating'], ax=axes[1])
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(8,6))
sns.jointplot(dataset, x='rating', y='salary', kind='reg')

In [None]:
sns.scatterplot(dataset['salary'])

# Label Encoding

In [None]:
le=LabelEncoder()
dataset['position']=le.fit_transform(dataset['position'])
dataset['country']=le.fit_transform(dataset['country'])
dataset['team']=le.fit_transform(dataset['team'])
dataset.head()

# Dividing Target and Attributes

In [None]:
X = dataset.drop(['salary'],axis=1)
Y = dataset['salary']

# Using Train Test Split

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.2)

In [None]:
X_train.shape

In [None]:
Y_train.shape

# Model Building and Predictions

In [None]:
model = {
 'Random Forest Regressor' : RandomForestRegressor(),
 'Gradient Boost Regressor' : GradientBoostingRegressor(),
 'XGBoost' : xg.XGBRegressor(),
 'XGRF Regressor' : xg.XGBRFRegressor(),
 'Support Vector regressor' : SVR(),
 'Lasso Reg' : Lasso(),
 'Ridge Reg' : Ridge(),
 'Linear Regression' : LinearRegression(),
 'Extra Trees Regressor' : ExtraTreesRegressor()
}

In [None]:
pred ={}
for name, model in model.items():
    model.fit(X_train, Y_train)
    y_pred = model.predict(X_test)
    pred[name]=y_pred

# Analysing Predictions

In [None]:
acc= {} 
for name, y_pred in pred.items():
    mse = mean_squared_error(Y_test, y_pred)
    mae = mean_absolute_error(Y_test, y_pred)
    r2 = r2_score(Y_test, y_pred)
    acc[name] = r2
    print(f"Results for {name} : ")
    print (f"Mean Square Error : {mse}")
    print (f"Mean Absolute Error : {mae}")
    print(f"R2 Score : {r2}")
    plt.figure(figsize=(15, 6))

# Plot Actual vs. Predicted values
    plt.subplot(1, 2, 1)
    plt.plot(np.arange(len(Y_test)), Y_test, label='Actual Trend')
    plt.plot(np.arange(len(Y_test)), y_pred, label='Predicted Trend')
    plt.xlabel('Data')
    plt.ylabel('Trend')
    plt.legend()
    plt.title('Actual vs. Predicted')

    # Plot Residuals
    residuals = Y_test - y_pred

    plt.subplot(1, 2, 2)
    plt.hist(residuals)
    plt.xlabel('Predicted Values')
    plt.ylabel('Residuals')
    plt.title('Residual Plot')

    plt.tight_layout()
    plt.show()