In [127]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import numpy as np
import plotly.graph_objects as go
from datetime import timedelta
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report

In [107]:
INDEX = "MS8"
TICKER = "GME"

In [120]:
#carico dati di google trends
trends_data = pd.read_csv( f"data/{INDEX}_trends.csv", index_col=False )
trends = trends_data.melt(id_vars=["date"], var_name="ticker", value_name="trend")
trends = trends[trends["ticker"] == TICKER].copy()
trends["trend_pct"] = trends["trend"].diff()

#carico dati su shorts
shorts_data = pd.read_csv( f"data/{INDEX}_short_interest_W.csv", index_col=False)
shorts = shorts_data[shorts_data["ticker"] == TICKER].copy()
shorts["d2c_pct"] = shorts["d2c"].pct_change()
shorts["shorts_pct"] = shorts["shorts"].pct_change()
shorts["volume_pct"] = shorts["volume"].pct_change()

#carico prezzi e calcolo volatilità
prices = pd.read_csv( f"data/{INDEX}_prices.csv", index_col=False )
prices = prices.melt(id_vars=["Date"], var_name="ticker", value_name="prices")
prices = prices.rename(columns={"Date": "date"})
prices = prices[prices["ticker"] == TICKER].copy()
prices["returns"] = prices["prices"].pct_change()
prices["volatility"] = prices["returns"].rolling(window=10).std()
prices["volatility_pct"] = prices["volatility"].diff()

In [121]:
#formatto date
shorts["date"] = pd.to_datetime(shorts["date"])
shorts = shorts[shorts["date"] >= "2020-06-01"]
prices["date"] = pd.to_datetime(prices["date"])
trends["date"] = pd.to_datetime(trends["date"])

In [122]:
#mergio tutti i dati
merged = pd.merge_asof(
    trends.sort_values(["ticker", "date"]),
    shorts.sort_values(["ticker", "date"]),
    on="date",
    by="ticker",
    direction="backward",
    tolerance=pd.Timedelta("10D")  # accetta max ±X giorni
)
df = pd.merge_asof(
    merged.sort_values(["ticker", "date"]),
    prices.sort_values(["ticker", "date"]),
    on="date",
    by="ticker",
    direction="backward",
    tolerance=pd.Timedelta("3D")
)

#tolgo valori nulli
df = df.dropna(subset=[ "returns", "d2c", "shorts", "volume", "trend", "volatility"])

In [123]:
# Calcola rolling means
window = 4
df["rolling_price"] = df["prices"].rolling(window=window, min_periods=1).mean()
df["rolling_d2c"] = df["d2c"].rolling(window=window, min_periods=1).mean()
df["rolling_shorts"] = df["shorts"].rolling(window=window, min_periods=1).mean()
df["rolling_vola"] = df["volatility"].rolling(window=window, min_periods=1).mean()
df["rolling_volu"] = df["volume"].rolling(window=window, min_periods=1).mean()

# Crea segnali booleani
cond_price_spike = df["prices"] > 1.3 * df["rolling_price"]
cond_d2c_drop = df["d2c"] < 0.9 * df["rolling_d2c"]
cond_shorts_drop = df["shorts"] < 0.9 * df["rolling_shorts"]
cond_vola_spike = df["volatility"] > 1.5 * df["rolling_vola"]
cond_volu_spike = df["volume"] > 1.5 * df["rolling_volu"]

# Combina i segnali in un unico indicatore
#df["squeeze_signal"] = ( cond_price_spike & ( cond_vola_spike | cond_volu_spike ) & ( cond_d2c_drop | cond_shorts_drop )).astype(int)
df["squeeze_signal"] = (df["returns"] > 0.10).astype(int)

print( df["squeeze_signal"].sum() )

6


In [124]:
def normalize(series):
    return (2 * ((series - series.min()) / (series.max() - series.min())) - 1)

fig = go.Figure()

fig.add_trace(go.Scatter(x=df["date"], y=normalize(df["returns"]), name="Returns"))
fig.add_trace(go.Scatter(x=df["date"], y=normalize(df["prices"]), name="Price", visible="legendonly"))
fig.add_trace(go.Scatter(x=df["date"], y=normalize(df["d2c"]), name="Days to Cover", visible="legendonly"))
fig.add_trace(go.Scatter(x=df["date"], y=normalize(df["trend"]), name="Google Trends", visible="legendonly"))
fig.add_trace(go.Scatter(x=df["date"], y=normalize(df["volatility"]), name="Volatility", visible="legendonly"))
fig.add_trace(go.Scatter(x=df["date"], y=normalize(df["shorts"]), name="Shorts", visible="legendonly"))
fig.add_trace(go.Scatter(x=df["date"], y=normalize(df["shorts_pct"]), name="Shorts Pct"))
fig.add_trace(go.Scatter(x=df["date"], y=normalize(df["volume_pct"]), name="Avg Daily Volume", visible="legendonly"))


# Aggiungi strisce verticali dove c'è uno squeeze
for date in df[df["squeeze_signal"] == 1]["date"]:
    fig.add_shape(
        type="rect",
        x0=date,
        x1=date + timedelta(weeks=1),
        y0=0,  # o min(df["returns"]) per coprire tutta l'area
        y1=1,  # o max(df["returns"])
        xref='x',
        yref='paper',  # yref='paper' va da 0 a 1 sull’altezza del grafico
        fillcolor="rgba(200,200,200,0.5)",
        line_width=0,
        layer="below"
    )

fig.update_layout(
    title=f"{TICKER}",
    hovermode="x unified",
    template="plotly_white"
)

fig.show()


In [125]:
#shifto tutte le varibili dipendenti
df["d2c_lag"] = df["d2c"].shift(1)
df["d2c_lag_pct"] = df["d2c_pct"].shift(1)
df["shorts_lag"] = df["shorts"].shift(1)
df["shorts_lag_pct"] = df["shorts_pct"].shift(1)
df["volu_lag"] = df["volume"].shift(1)
df["volu_lag_pct"] = df["volume_pct"].shift(1)
df["trend_lag"] = df["trend"].shift(1)
df["trend_lag_pct"] = df["trend_pct"].shift(1)
df["vola_lag"] = df["volatility"].shift(1)
df["vola_lag_pct"] = df["volatility_pct"].shift(1)

#guardo correlazioni con i ritorni
df[["returns", "d2c_lag", "trend_lag", "vola_lag", "shorts_lag", "volu_lag"]].corr(method="pearson")

df["returns"] = df["returns"].shift(1)

In [126]:
scaler = StandardScaler()
df[["d2c_lag", "d2c_lag_pct", "trend_lag", "trend_lag_pct", "vola_lag", "vola_lag_pct", "volu_lag", "volu_lag_pct", "shorts_lag", "shorts_lag_pct"]] = scaler.fit_transform(df[["d2c_lag", "d2c_lag_pct", "trend_lag", "trend_lag_pct", "vola_lag", "vola_lag_pct", "volu_lag", "volu_lag_pct", "shorts_lag", "shorts_lag_pct"]])

# Drop NA (dovuti a shift o missing)
df_clean = df[["squeeze_signal","d2c_lag", "d2c_lag_pct", "trend_lag", "trend_lag_pct", "vola_lag", "vola_lag_pct", "volu_lag", "volu_lag_pct", "shorts_lag", "shorts_lag_pct"]].dropna()

# Variabili
X = df_clean[["d2c_lag", "d2c_lag_pct", "trend_lag", "trend_lag_pct", "vola_lag", "vola_lag_pct", "volu_lag", "volu_lag_pct", "shorts_lag", "shorts_lag_pct"]]
y = df_clean["squeeze_signal"]

# Aggiungi costante
X = sm.add_constant(X)

# Modello logit
logit_model = sm.Logit(y, X)
result = logit_model.fit()

# Riassunto
print(result.summary())

Optimization terminated successfully.
         Current function value: 0.091931
         Iterations 10
                           Logit Regression Results                           
Dep. Variable:         squeeze_signal   No. Observations:                  187
Model:                          Logit   Df Residuals:                      176
Method:                           MLE   Df Model:                           10
Date:                Fri, 27 Jun 2025   Pseudo R-squ.:                  0.3522
Time:                        15:35:59   Log-Likelihood:                -17.191
converged:                       True   LL-Null:                       -26.539
Covariance Type:            nonrobust   LLR p-value:                   0.04431
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
const             -4.9965      1.088     -4.590      0.000      -7.130      -2.863
d2c_lag         

In [128]:
y_true = y
y_pred = (result.predict(X) > 0.5).astype(int)
confusion = confusion_matrix(y_true, y_pred)
print(confusion)

[[181   0]
 [  4   2]]


In [129]:
print("\nClassification Report:")
print(classification_report(y_true, y_pred, digits=3))


Classification Report:
              precision    recall  f1-score   support

           0      0.978     1.000     0.989       181
           1      1.000     0.333     0.500         6

    accuracy                          0.979       187
   macro avg      0.989     0.667     0.745       187
weighted avg      0.979     0.979     0.973       187

