In [1]:
# Gerekli kütüphaneler yükleniyor
import pandas as pd
import datetime as dt
from statsmodels.tsa.arima.model import ARIMA
import matplotlib.pyplot as plt

# Temizlenmiş veriyi oku
df = pd.read_csv('../outputs/cleaned_data.csv')

# Kolon adlarını Türkçeleştir
df.columns = [
    'FaturaNo', 'UrunKodu', 'UrunAciklamasi', 'Miktar',
    'FaturaTarihi', 'BirimFiyat', 'MusteriID', 'Ulke', 'ToplamFiyat'
]

# Fatura tarihini datetime formatına çevir (örnek: 12/01/2010 08:26)
df['FaturaTarihi'] = pd.to_datetime(df['FaturaTarihi'], dayfirst=True, errors='coerce')

# ekranda göster
df['FaturaTarihi']

0        2010-01-12 08:26:00
1        2010-01-12 08:26:00
2        2010-01-12 08:26:00
3        2010-01-12 08:26:00
4        2010-01-12 08:26:00
                 ...        
406824   2011-09-12 12:50:00
406825   2011-09-12 12:50:00
406826   2011-09-12 12:50:00
406827   2011-09-12 12:50:00
406828   2011-09-12 12:50:00
Name: FaturaTarihi, Length: 406829, dtype: datetime64[ns]

In [2]:
# Hatalı tarihleri çıkar
df = df[df['FaturaTarihi'].notnull()]

# Negatif ve eksik değerleri filtrele
df = df[(df['ToplamFiyat'] > 0) & (df['MusteriID'].notnull())]

# Ekranda göster
df

Unnamed: 0,FaturaNo,UrunKodu,UrunAciklamasi,Miktar,FaturaTarihi,BirimFiyat,MusteriID,Ulke,ToplamFiyat
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-01-12 08:26:00,2.55,17850.0,United Kingdom,15.30
1,536365,71053,WHITE METAL LANTERN,6,2010-01-12 08:26:00,3.39,17850.0,United Kingdom,20.34
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-01-12 08:26:00,2.75,17850.0,United Kingdom,22.00
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-01-12 08:26:00,3.39,17850.0,United Kingdom,20.34
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-01-12 08:26:00,3.39,17850.0,United Kingdom,20.34
...,...,...,...,...,...,...,...,...,...
406824,581587,22613,PACK OF 20 SPACEBOY NAPKINS,12,2011-09-12 12:50:00,0.85,12680.0,France,10.20
406825,581587,22899,CHILDREN'S APRON DOLLY GIRL,6,2011-09-12 12:50:00,2.10,12680.0,France,12.60
406826,581587,23254,CHILDRENS CUTLERY DOLLY GIRL,4,2011-09-12 12:50:00,4.15,12680.0,France,16.60
406827,581587,23255,CHILDRENS CUTLERY CIRCUS PARADE,4,2011-09-12 12:50:00,4.15,12680.0,France,16.60


In [3]:
# Snapshot date: veri kümesindeki en son tarihten 1 gün sonrası
snapshot_date = df['FaturaTarihi'].max() + dt.timedelta(days=1)

# Ekranda göster
snapshot_date

Timestamp('2011-12-11 17:19:00')

In [4]:
# RFM hesaplama
rfm = df.groupby('MusteriID').agg({
    'FaturaTarihi': lambda x: (snapshot_date - x.max()).days,
    'FaturaNo': 'nunique',
    'ToplamFiyat': 'sum'
})
rfm.columns = ['Recency', 'Frequency', 'Monetary']

# Ekranda göster
rfm

Unnamed: 0_level_0,Recency,Frequency,Monetary
MusteriID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
12347.0,96,5,2540.29
12348.0,221,1,367.00
12350.0,312,1,334.40
12352.0,275,3,1296.38
12355.0,97,1,459.40
...,...,...,...
18280.0,161,1,180.60
18281.0,5,1,80.82
18282.0,217,2,178.05
18283.0,61,5,565.65


In [5]:
# RFM skorlarını hesapla
rfm['R_Score'] = pd.qcut(rfm['Recency'], 4, labels=[4, 3, 2, 1])
rfm['F_Score'] = pd.qcut(rfm['Frequency'].rank(method='first'), 4, labels=[1, 2, 3, 4])
rfm['M_Score'] = pd.qcut(rfm['Monetary'], 4, labels=[1, 2, 3, 4])

# Skorları birleştir
rfm['RFM_Score'] = rfm['R_Score'].astype(str) + rfm['F_Score'].astype(str) + rfm['M_Score'].astype(str)

# Ekranda göster
rfm['RFM_Score']

MusteriID
12347.0    344
12348.0    112
12350.0    112
12352.0    134
12355.0    312
          ... 
18280.0    221
18281.0    421
18282.0    231
18283.0    343
18287.0    423
Name: RFM_Score, Length: 2997, dtype: object

In [6]:
# Segment fonksiyonu
def rfm_segment(row):
    if row['RFM_Score'] == '444':
        return 'Loyal Champion'
    elif row['R_Score'] == '4':
        return 'Recent Buyer'
    elif row['F_Score'] == '4':
        return 'Frequent Buyer'
    elif row['M_Score'] == '4':
        return 'Big Spender'
    elif row['R_Score'] == '1':
        return 'At Risk'
    else:
        return 'Others'

rfm['Segment'] = rfm.apply(rfm_segment, axis=1)

# Ekranda göster
rfm['Segment']

MusteriID
12347.0    Others
12348.0    Others
12350.0    Others
12352.0    Others
12355.0    Others
            ...  
18280.0    Others
18281.0    Others
18282.0    Others
18283.0    Others
18287.0    Others
Name: Segment, Length: 2997, dtype: object

In [7]:
# Sonuçları CSV olarak kaydet
rfm.to_csv('../outputs/rfm_segments.csv', index=True)

# İlk 5 sonucu göster
print("RFM segmentasyonu başarılı bir şekilde kaydedildi.")

# Ekranda Göster
rfm.head()

RFM segmentasyonu başarılı bir şekilde kaydedildi.


Unnamed: 0_level_0,Recency,Frequency,Monetary,R_Score,F_Score,M_Score,RFM_Score,Segment
MusteriID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
12347.0,96,5,2540.29,3,4,4,344,Others
12348.0,221,1,367.0,1,1,2,112,Others
12350.0,312,1,334.4,1,1,2,112,Others
12352.0,275,3,1296.38,1,3,4,134,Others
12355.0,97,1,459.4,3,1,2,312,Others
