In [18]:
import pandas as pd
# Just to have a clear EDA it is better to check the ready data to check what it shows us when analyzing it. 

df = pd.read_csv("ready_for_lightgbm_forecasting.csv")
print(df)

# Ensure month is datetime if needed (optional)
if "month" in df.columns:
    df["month"] = pd.to_datetime(df["month"], errors="coerce")

# Filter to 2024 (uses 'year' column as in your data)
df_2024 = df[df["year"] == 2024].copy()

# ---- Aggregate per car in 2024
car_2024 = (
    df_2024.groupby("Car Model", as_index=False)
    .agg(
        registrations_2024=("registrations", "sum"),
        max_cum_sent_2024=("cumulative_like_weighted_sentiment", "max"),
    )
)

# Choose how many cars to show
TOP_N = 10

# 1) Cars with most registrations in 2024 + their highest cumulative sentiment (in 2024)
top_reg_2024 = (
    car_2024.sort_values("registrations_2024", ascending=False)
    .head(TOP_N)
    .reset_index(drop=True)
)

# 2) Cars with highest cumulative sentiment in 2024 + their registrations in 2024
top_cum_sent_2024 = (
    car_2024.sort_values("max_cum_sent_2024", ascending=False)
    .head(TOP_N)
    .reset_index(drop=True)
)

print("\n=== Top cars by registrations in 2024 (with max cumulative sentiment in 2024) ===")
print(top_reg_2024)

print("\n=== Top cars by max cumulative sentiment in 2024 (with registrations in 2024) ===")
print(top_cum_sent_2024)


          Car Model       month  registrations  like_weighted_sentiment  \
0    AUDI Q4 E-TRON  2021-12-01           1202                     -5.0   
1    AUDI Q4 E-TRON  2022-01-01            119                      1.0   
2    AUDI Q4 E-TRON  2022-02-01            310                      5.0   
3    AUDI Q4 E-TRON  2022-03-01            462                     -4.0   
4    AUDI Q4 E-TRON  2022-04-01            236                     -8.0   
..              ...         ...            ...                      ...   
882      VOLVO XC60  2024-07-01            661                     -1.0   
883      VOLVO XC60  2024-08-01            543                      9.0   
884      VOLVO XC60  2024-09-01            406                      6.0   
885      VOLVO XC60  2024-10-01            545                     -1.0   
886      VOLVO XC60  2024-11-01            682                     -2.0   

     cumulative_like_weighted_sentiment  lag_1_registrations  lag_1_sentiment  \
0                 

In [14]:
import pandas as pd

df = pd.read_csv("ready_for_lightgbm_forecasting.csv")

# Ensure datetime
df["month"] = pd.to_datetime(df["month"], errors="coerce")

# -----------------------------
# Filter period: 2021–2024
# -----------------------------
df_period = df[(df["year"] >= 2021) & (df["year"] <= 2024)].copy()

# -----------------------------
# Aggregate per car
# -----------------------------
car_summary = (
    df_period.groupby("Car Model", as_index=False)
    .agg(
        total_registrations_2021_2024=("registrations", "sum"),
        max_cumulative_sentiment_2021_2024=("cumulative_like_weighted_sentiment", "max"),
    )
)

TOP_N = 10

# -----------------------------
# Table A:
# Cars with most registrations
# -----------------------------
top_by_reg = (
    car_summary
    .sort_values("total_registrations_2021_2024", ascending=False)
    .head(TOP_N)
    .reset_index(drop=True)
)

# -----------------------------
# Table B:
# Cars with highest cumulative sentiment
# -----------------------------
top_by_sent = (
    car_summary
    .sort_values("max_cumulative_sentiment_2021_2024", ascending=False)
    .head(TOP_N)
    .reset_index(drop=True)
)

print("\n=== Top cars by TOTAL registrations (2021–2024) ===")
print(top_by_reg)

print("\n=== Top cars by MAX cumulative sentiment (2021–2024) ===")
print(top_by_sent)


=== Top cars by TOTAL registrations (2021–2024) ===
            Car Model  total_registrations_2021_2024  \
0       TESLA MODEL Y                          34732   
1         PEUGEOT 208                          31171   
2            KIA NIRO                          26188   
3     VOLKSWAGEN POLO                          23939   
4          VOLVO XC40                          22098   
5  TOYOTA YARIS CROSS                          21710   
6        TOYOTA YARIS                          20616   
7      TOYOTA COROLLA                          19043   
8        PEUGEOT 2008                          18018   
9            FIAT 500                          16377   

   max_cumulative_sentiment_2021_2024  
0                               542.0  
1                              3676.0  
2                              -505.0  
3                              1744.0  
4                             -2518.0  
5                              1609.0  
6                              -385.0  
7         