In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model
import joblib

In [2]:
test_df = pd.read_csv("dataset/test.csv")
test_df.head()

Unnamed: 0,event_time,event_type,product_id,category_id,user_id,user_session
0,2025-06-28 10:09:58+00:00,ADD_CART,PROD_015000,CAT_00019,USER_109759,SESSION_164059
1,2025-06-25 11:57:50+00:00,ADD_CART,PROD_023887,CAT_00010,USER_010614,SESSION_109583
2,2025-06-30 14:34:20+00:00,ADD_CART,PROD_022673,CAT_00090,USER_041338,SESSION_171382
3,2025-06-30 22:12:18+00:00,ADD_CART,PROD_004664,CAT_00280,USER_015376,SESSION_137110
4,2025-06-26 16:55:18+00:00,ADD_CART,PROD_027815,CAT_00027,USER_054449,SESSION_146503


In [3]:
test_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 62951 entries, 0 to 62950
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   event_time    62951 non-null  object
 1   event_type    62951 non-null  object
 2   product_id    62951 non-null  object
 3   category_id   62951 non-null  object
 4   user_id       62951 non-null  object
 5   user_session  62951 non-null  object
dtypes: object(6)
memory usage: 2.9+ MB


In [4]:
print(test_df['event_type'].nunique())
print(test_df["product_id"].nunique())
print(test_df["category_id"].nunique())
print(test_df["user_id"].nunique())
print(test_df["user_session"].nunique())

4
17450
433
22665
30789


In [5]:
test_df["event_time"] = pd.to_datetime(test_df["event_time"], utc=True)

test_df["hour"] = test_df["event_time"].dt.hour
test_df["day"] = test_df["event_time"].dt.day
test_df["weekday"] = test_df["event_time"].dt.weekday  # 0 = Pazartesi, 6 = Pazar
test_df["is_weekend"] = (test_df["weekday"] >= 5).astype(int) # Hafta sonu kontrolü

In [6]:
# --- 1) Ürün ve kategori çeşitliliği + diğer özetler ---

# --- 1.1) Session bazlı özet ---
agg_features_test = test_df.groupby("user_session").agg(
    user_id=("user_id", "first"),          
    n_events=("event_type", "count"),      
    n_products=("product_id", "nunique"),  
    n_categories=("category_id", "nunique"),
    avg_hour=("hour", "mean"),             
    is_weekend=("is_weekend", "max"),      
).reset_index()

# --- 1.2) Yoğunluk & tekrar oranları ---
agg_features_test["product_repeat_rate"] = agg_features_test["n_events"] / (agg_features_test["n_products"] + 1)
agg_features_test["category_repeat_rate"] = agg_features_test["n_events"] / (agg_features_test["n_categories"] + 1)

# --- 1.3) Event frekansları ---
event_counts_test = test_df.pivot_table(
    index="user_session",
    columns="event_type",
    values="event_time",
    aggfunc="count",
    fill_value=0
).reset_index()

agg_features_test = agg_features_test.merge(event_counts_test, on="user_session", how="left")

# --- 1.4) Event oranları ---
agg_features_test["conversion_rate"] = agg_features_test["BUY"] / (agg_features_test["n_events"] + 1)
agg_features_test["add_cart_ratio"] = agg_features_test["ADD_CART"] / (agg_features_test["n_events"] + 1)
agg_features_test["remove_vs_add"] = agg_features_test["REMOVE_CART"] / (agg_features_test["ADD_CART"] + 1)
agg_features_test["view_to_buy_ratio"] = agg_features_test["BUY"] / (agg_features_test["VIEW"] + 1)
agg_features_test["buy_ratio"] = agg_features_test["BUY"] / (agg_features_test["VIEW"] + 1)

# --- 1.5) BUY pozisyonu () ---

# 1.5.1) Event sırası hesapla
test_df["event_index"] = test_df.groupby("user_session").cumcount() + 1  # her session’da event sırası

# 1.5.2) Session bazında ilk BUY pozisyonunu bul
buy_pos = test_df[test_df["event_type"] == "BUY"].groupby("user_session").agg(
    buy_position=("event_index", "min")
).reset_index()

# 1.5.3) Test setine merge et
agg_features_test = agg_features_test.merge(buy_pos, on="user_session", how="left")

# 1.5.4) Normalleştirilmiş pozisyon
# NaN olanlar = BUY yok, 1.0 ile dolduruyoruz (en sona kadar hiç buy gelmedi)
agg_features_test["buy_position_norm"] = agg_features_test["buy_position"] / (agg_features_test["n_events"] + 1)
agg_features_test["buy_position_norm"] = agg_features_test["buy_position_norm"].fillna(1.0)

# 1.5.5) Binary flag ekle (BUY var mı yok mu)
agg_features_test["has_buy"] = agg_features_test["buy_position"].notna().astype(int)

In [7]:
# --- 2) event_type frekansları  ---

agg_features_test = agg_features_test.merge(event_counts_test, on="user_session", how="left")

drop_cols = [c for c in agg_features_test.columns if '_x' in c or '_y' in c]
agg_features_test = agg_features_test.drop(columns=drop_cols)

In [8]:
# --- 3) Birleştirme ---

test_session = agg_features_test.merge(event_counts_test, on="user_session", how="left", sort=False)

In [9]:
# --- 4) event_time sütununu datetime'a çevir ---

test_session["hour"] = test_df["event_time"].dt.hour
test_session["day"] = test_df["event_time"].dt.day
test_session["weekday"] = test_df["event_time"].dt.weekday  # 0 = Pazartesi, 6 = Pazar
test_session["is_weekend"] = (test_df["event_time"].dt.weekday >= 5).astype(int) # Hafta sonu kontrolü

In [10]:
test_session.head()

Unnamed: 0,user_session,user_id,n_events,n_products,n_categories,avg_hour,is_weekend,product_repeat_rate,category_repeat_rate,conversion_rate,...,buy_position,buy_position_norm,has_buy,ADD_CART,BUY,REMOVE_CART,VIEW,hour,day,weekday
0,SESSION_000000,USER_027389,28,27,19,12.035714,1,1.0,1.4,0.0,...,,1.0,0,23,0,5,0,10,28,5
1,SESSION_000013,USER_096685,2,2,2,19.0,0,0.666667,0.666667,0.0,...,,1.0,0,0,0,0,2,11,25,2
2,SESSION_000022,USER_004363,1,1,1,15.0,0,0.5,0.5,0.0,...,,1.0,0,1,0,0,0,14,30,0
3,SESSION_000024,USER_041338,1,1,1,15.0,0,0.5,0.5,0.0,...,,1.0,0,0,0,0,1,22,30,0
4,SESSION_000025,USER_052581,3,3,2,12.333333,0,0.75,1.0,0.0,...,,1.0,0,1,0,2,0,16,26,3


In [11]:
test_session.shape

(30789, 24)

In [14]:
test_session = test_session.drop(columns=['buy_position'])

In [15]:
# Özellikler (hedefi ve kimlik kolonlarını çıkartıyoruz)
# Tüm sayısal kolonları seç
feature_cols = test_session.select_dtypes(include=['int64', 'float64', 'int32', 'float32']).columns.tolist()

# Hedef ve ID kolonlarını çıkar
feature_cols = [c for c in feature_cols if c not in ['session_value', 'user_id']]
feature_cols

['n_events',
 'n_products',
 'n_categories',
 'avg_hour',
 'is_weekend',
 'product_repeat_rate',
 'category_repeat_rate',
 'conversion_rate',
 'add_cart_ratio',
 'remove_vs_add',
 'view_to_buy_ratio',
 'buy_ratio',
 'buy_position_norm',
 'has_buy',
 'ADD_CART',
 'BUY',
 'REMOVE_CART',
 'VIEW',
 'hour',
 'day',
 'weekday']

In [16]:
X_test = test_session[feature_cols].values

scaler = joblib.load("scaler.save")
X_test_scaled = scaler.transform(X_test)

In [17]:
model_ann = load_model("final_model.h5")

In [19]:
test_pred_ann = model_ann.predict(X_test_scaled)

submission_df = pd.DataFrame({
    'user_session': agg_features_test['user_session'],
    'session_value': test_pred_ann.flatten()
})



In [None]:
original_order = test_df[["user_session"]].drop_duplicates()
submission_df = original_order.merge(submission_df, on="user_session", how="left")

submission_df.to_csv("sample_submission.csv", index=False)
print("sample_submission.csv dosyası kaydedildi ✅")

sample_submission_1.csv dosyası kaydedildi ✅
