In [2]:
import pandas as pd
import numpy as np
from flask import Flask, jsonify, render_template
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

In [3]:
# Load your dataset into a DataFrame
df = pd.read_csv('vgsales.csv')



# Encode categorical variables using one-hot encoding
df = pd.get_dummies(df, columns=['Platform', 'Genre', 'Publisher'])
print (df["Year"].min())

# Create a feature for total sales for the previous NINE years
df['TotalSalesPrev9Years'] = df[df['Year'].isin(list(range(2015, 2023)))]['EU_Sales'].sum()


print(df[df['Year'].isin(list(range(2015, 2023)))]['EU_Sales'].sum())
print()
print(df)

2015
2150.43

        Rank                                              Name  Year  A_Sales  \
0          1                                        Wii Sports  2016    41.49   
1          2                                 Super Mario Bros.  2016    29.08   
2          3                                    Mario Kart Wii  2017    15.85   
3          4                                 Wii Sports Resort  2019    15.75   
4          5                          Pokemon Red/Pokemon Blue  2023    11.27   
...      ...                                               ...   ...      ...   
16593  16596                Woody Woodpecker in Crazy Castle 5  2017     0.01   
16594  16597                     Men in Black II: Alien Escape  2018     0.01   
16595  16598  SCORE International Baja 1000: The Official Game  2019     0.00   
16596  16599                                        Know How 2  2016     0.00   
16597  16600                                  Spirits & Spells  2017     0.01   

       EU_Sal

In [18]:

app = Flask(__name__)

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/get_data', methods=['GET'])
def get_data():
    # Load your dataset into a DataFrame
    df = pd.read_csv('vgsales.csv')

    # Encode categorical variables using one-hot encoding
    df = pd.get_dummies(df, columns=['Platform', 'Genre', 'Publisher'])

    # Calculate your desired results
    total_sales = df[df['Year'].isin(list(range(2015, 2023)))]['EU_Sales'].sum()

    # Create a dictionary to hold the results
    result = {"total_sales": total_sales, "data_frame": df.to_dict(orient='records')}

    return jsonify(result)

app.run(debug=True)

 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
 * Restarting with stat


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
X = df[['Platform', 'Year', 'Genre', 'Publisher', 'TotalSalesPrev9Years']]
y = df['Global_Sales']


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
model = LinearRegression()
model.fit(X_train, y_train)


In [None]:
y_pred = model.predict(X_test)


In [None]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")


In [None]:
plt.scatter(y_test, y_pred)
plt.xlabel("Actual Sales")
plt.ylabel("Predicted Sales")
plt.title("Actual vs. Predicted Sales")
plt.show()
