In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import plotly.graph_objects as go
import os

# --- Data Preparation and Model Training (from previous weeks) ---
# Note: This is included to ensure all variables are defined.
df = pd.read_csv(r'C:\Users\M.ANTONY ROJES\Downloads\Infosys\data\feature_engineered\player_features_model_all_imputed.csv')
features = ['passes_attempted', 'expected_goals', 'goals', 'assists', 'injury_count', 'total_days_out', 'avg_market_value']
target = 'avg_market_value'

scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(df[features])

def create_sequences(data, n_steps):
    X, y = [], []
    for i in range(len(data)):
        end_ix = i + n_steps
        if end_ix > len(data) - 1:
            break
        seq_x, seq_y = data[i:end_ix, :], data[end_ix, -1]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

n_steps = 3
X, y = create_sequences(scaled_data, n_steps)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(n_steps, X.shape[2])))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, y_train, epochs=50, validation_data=(X_test, y_test), verbose=0)
print("LSTM model has been successfully rebuilt and trained.")

# --- Corrected Week 8 Code: Final Predictions and Visualization ---
# Make final predictions on the test set
final_predictions_scaled = model.predict(X_test).flatten()

# Reshape y_test and final_predictions_scaled to a 2D array
y_test_2d = y_test.reshape(-1, 1)
final_predictions_2d = final_predictions_scaled.reshape(-1, 1)

# Create dummy arrays with the correct number of features (7) for inverse transformation
dummy_array_test = np.zeros(shape=(len(y_test_2d), 7))
dummy_array_preds = np.zeros(shape=(len(final_predictions_2d), 7))

# Place the scaled target values in the last column
dummy_array_test[:, -1] = y_test_2d.flatten()
dummy_array_preds[:, -1] = final_predictions_2d.flatten()

# Inverse transform the dummy arrays to get the original scale
y_test_original = scaler.inverse_transform(dummy_array_test)[:, -1]
final_predictions_original = scaler.inverse_transform(dummy_array_preds)[:, -1]

# Display a comparison of a few actual vs. predicted values
comparison_df = pd.DataFrame({'Actual Market Value': y_test_original, 'Predicted Market Value': final_predictions_original})
print(comparison_df.sample(5))

# Create traces for the actual and predicted values
actual_trace = go.Scatter(
    x=np.arange(len(y_test_original)),
    y=y_test_original,
    mode='lines',
    name='Actual Market Value'
)

predicted_trace = go.Scatter(
    x=np.arange(len(final_predictions_original)),
    y=final_predictions_original,
    mode='lines',
    name='Predicted Market Value'
)

# Create the figure and add traces
fig = go.Figure(data=[actual_trace, predicted_trace])

# Add titles and labels
fig.update_layout(
    title='Actual vs. Predicted Player Market Values (Test Set)',
    xaxis_title='Player Index',
    yaxis_title='Market Value (in millions)'
)

# Save the plot as an HTML file
fig.write_html("market_value_predictions.html")

print("\n✅ Interactive visualization saved as 'market_value_predictions.html'")

  super().__init__(**kwargs)


LSTM model has been successfully rebuilt and trained.
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step
     Actual Market Value  Predicted Market Value
154            -0.314860                0.266712
5              -0.217042               -0.023035
118             2.350666                0.014438
38              0.027501                0.007141
53             -0.241497                0.065765

✅ Interactive visualization saved as 'market_value_predictions.html'


In [2]:
# === WEEK 8 VERIFICATION ===

# 1. Check model training
try:
    print("Model Summary:")
    model.summary()
except Exception as e:
    print("Model not defined or not trained:", e)

# 2. Verify predictions comparison DataFrame
try:
    print("\n[Comparison of Actual vs Predicted Market Values]")
    print(comparison_df.head(10))
    print(f"Total predictions: {len(comparison_df)}")
except Exception as e:
    print("Comparison DataFrame not found:", e)

# 3. Check visualization file
import os
if os.path.exists("market_value_predictions.html"):
    print("\n✅ Visualization file exists: market_value_predictions.html")
else:
    print("\n❌ Visualization file not found.")


Model Summary:



[Comparison of Actual vs Predicted Market Values]
   Actual Market Value  Predicted Market Value
0            -0.229269                0.031699
1            -0.588749               -0.020996
2            -0.314860                0.000883
3            -0.314860               -0.088239
4            -0.510495                0.284969
5            -0.217042               -0.023035
6             1.250219               -0.020218
7            -0.510495               -0.013901
8            -0.314860               -0.042291
9            -0.481149               -0.084505
Total predictions: 229

✅ Visualization file exists: market_value_predictions.html


In [None]:
# --- Reload Week 3 outputs ---
events_df = pd.read_csv("events_week3.csv")

# --- Reload Week 4 outputs ---
summary = pd.read_csv("player_summary_week4.csv")

# --- Reload Week 5 outputs ---
sentiment_df = pd.read_csv("sentiment_cleaned.csv")



Columns (81,83,93,94,95,96,97,98,99,100,101,103,104,106,112,114,115,116,117,118,119,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137) have mixed types. Specify dtype option on import or set low_memory=False.

