In [4]:
import pandas as pd
import numpy as np
import requests
import os
from dotenv import load_dotenv
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import plotly.graph_objects as go

# --- 1. Load Secrets and Datasets ---
load_dotenv() # Load variables from .env file
# bearer_token = os.getenv("BEARER_TOKEN") # You can now use this for API calls

try:
    df_perf = pd.read_csv('Full Dataset (league standings with players stats).csv')
    df_injuries = pd.read_csv('James_Milner_injuries.csv')
    df_sentiment = pd.read_csv('James_Milner_recent_sentiment.csv') # Load recent sentiment
    player_id = "3333" # James Milner
    url = f"https://www.transfermarkt.com/ceapi/marketValueDevelopment/graph/{player_id}"
    headers = {'User-Agent': 'Mozilla/5.0'}
    response = requests.get(url, headers=headers)
    df_value = pd.DataFrame(response.json().get('list', []))
    df_value = df_value[['datum_mw', 'y']].rename(columns={'datum_mw': 'date', 'y': 'market_value_eur'})
    df_value['date'] = pd.to_datetime(df_value['date'], format='%d.%m.%Y')
    print("✅ Successfully loaded all datasets.")
except Exception as e:
    print(f"❌ Error loading data: {e}")
    exit()

# --- 2. Create the Master Time-Series (As before) ---
df_perf = df_perf.loc[:, ~df_perf.columns.str.contains('^Unnamed')]
df_perf['fullname'] = df_perf['firstname'] + ' ' + df_perf['lastname']
player_name = "James Philip Milner"
df_player_perf = df_perf[df_perf['fullname'] == player_name].copy()
df_player_perf['season_year'] = df_player_perf['season'].astype(str).str.split('/').str[0]
df_player_perf['date'] = pd.to_datetime(df_player_perf['season_year'] + '-06-30', format='%y-%m-%d', errors='coerce')
df_player_perf.dropna(subset=['date'], inplace=True)
df_injuries['start_date'] = pd.to_datetime(df_injuries['start_date'], errors='coerce', dayfirst=True)
df_injuries['end_date'] = pd.to_datetime(df_injuries['end_date'], errors='coerce', dayfirst=True)
df_injuries.dropna(subset=['start_date', 'end_date'], inplace=True)
df_injuries['days_missed'] = pd.to_numeric(df_injuries['days_missed'], errors='coerce').fillna(0)
start_date = df_player_perf['date'].min()
end_date = pd.to_datetime('today')
quarterly_dates = pd.date_range(start=start_date, end=end_date, freq='QE')
df_master = pd.DataFrame(index=quarterly_dates)
df_master = pd.merge_asof(df_master.sort_index(), df_value.sort_values('date'), left_index=True, right_on='date', direction='nearest')
df_master.set_index('date', inplace=True); df_master['market_value_eur'] = df_master['market_value_eur'].ffill()
df_player_perf.set_index('date', inplace=True)
df_master = df_master.join(df_player_perf[['totalGoals', 'goalAssists', 'totalShots']]); df_master[['totalGoals', 'goalAssists', 'totalShots']] = df_master[['totalGoals', 'goalAssists', 'totalShots']].ffill()
df_master['days_injured'] = 0
for index, row in df_master.iterrows():
    quarter_start = row.name - pd.DateOffset(months=3); quarter_end = row.name
    overlapping_injuries = df_injuries[(df_injuries['start_date'] <= quarter_end) & (df_injuries['end_date'] >= quarter_start)]
    total_days_missed = 0
    if not overlapping_injuries.empty:
        for _, injury_row in overlapping_injuries.iterrows():
            overlap_start = max(quarter_start, injury_row['start_date']); overlap_end = min(quarter_end, injury_row['end_date'])
            total_days_missed += (overlap_end - overlap_start).days if (overlap_end - overlap_start).days > 0 else 0
    df_master.loc[index, 'days_injured'] = total_days_missed
df_master.fillna(0, inplace=True)

# --- 3. Train the Final LSTM Model (As before) ---
data_to_train = df_master[['totalGoals', 'goalAssists', 'days_injured', 'market_value_eur']].copy()
sequence_length = 4
scaler = MinMaxScaler(feature_range=(0, 1)); scaled_data = scaler.fit_transform(data_to_train)
X, y = [], [];
for i in range(sequence_length, len(scaled_data)):
    X.append(scaled_data[i-sequence_length:i, :]); y.append(scaled_data[i, -1])
X, y = np.array(X), np.array(y)
model = Sequential([LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], X.shape[2])), LSTM(units=50), Dense(units=1)])
model.compile(optimizer='adam', loss='mean_squared_error'); print("\nTraining final transfer value prediction model...");
model.fit(X, y, epochs=100, batch_size=4, verbose=0); print("✅ Final model trained.")

# --- 4. NEW: Forecast into the Future (to 2026) ---
n_future_steps = (2026 - pd.to_datetime('today').year) * 4 # Number of quarters until end of 2026
future_predictions_scaled = []
current_sequence = X[-1].reshape(1, X.shape[1], X.shape[2]) # Start with the last known data

for _ in range(n_future_steps):
    next_pred_scaled = model.predict(current_sequence)
    future_predictions_scaled.append(next_pred_scaled[0][0])
    # Create a new sequence by removing the oldest step and adding the new prediction
    new_step = np.insert(current_sequence[0, -1, 1:], -1, next_pred_scaled[0][0]).reshape(1, 1, X.shape[2])
    current_sequence = np.append(current_sequence[:, 1:, :], new_step, axis=1)

# Inverse transform the future predictions
future_predictions = scaler.inverse_transform(np.concatenate((np.zeros((n_future_steps, 3)), np.array(future_predictions_scaled).reshape(-1, 1)), axis=1))[:, -1]
future_dates = pd.date_range(start=df_master.index[-1] + pd.DateOffset(months=3), periods=n_future_steps, freq='QE')

# --- 5. NEW: Calculate Recent Sentiment ---
average_sentiment = df_sentiment['sentiment_score'].mean()
sentiment_text = "Positive" if average_sentiment >= 0.05 else "Negative" if average_sentiment <= -0.05 else "Neutral"

# --- 6. Visualize with Future Forecast ---
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_master.index, y=df_master['market_value_eur'], mode='lines', name='Actual Historical Value'))
fig.add_trace(go.Scatter(x=future_dates, y=future_predictions, mode='lines+markers', name='Forecasted Future Value', line=dict(dash='dash')))
fig.update_layout(title=f"Market Value Forecast for {player_name} to 2026", xaxis_title="Date", yaxis_title="Market Value (EUR)")
fig.show()

print("\n--- 🚀 Final Forecast ---")
print(f"The model forecasts {player_name}'s market value in late 2026 to be around: €{future_predictions[-1]:,.0f}")
print(f"The player's current social media sentiment is: {sentiment_text} (Score: {average_sentiment:.2f})")

✅ Successfully loaded all datasets.



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.




Training final transfer value prediction model...
✅ Final model trained.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 146ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step



--- 🚀 Final Forecast ---
The model forecasts James Philip Milner's market value in late 2026 to be around: €1,616,205
The player's current social media sentiment is: Positive (Score: 0.13)
