In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV


In [None]:
# Import Data
train_data = pd.read_csv('/content/drive/MyDrive/archive/train.csv')
features_data = pd.read_csv('/content/drive/MyDrive/archive/features.csv')
stores_data = pd.read_csv('/content/drive/MyDrive/archive/stores.csv')
test_data = pd.read_csv('/content/drive/MyDrive/archive/test.csv')


In [None]:
# Merge datasets
train_df = pd.merge(stores_data, pd.merge(train_data, features_data))
test_df = pd.merge(stores_data, pd.merge(test_data, features_data))

In [None]:
output_file_path = '/content/drive/MyDrive/walmart/train.csv'
train_df.to_csv(output_file_path, index=False)
output_file_path = '/content/drive/MyDrive/walmart/test.csv'
test_df.to_csv(output_file_path, index=False)

In [None]:
y_train = train_df['Weekly_Sales']
X_train = train_df.drop(columns=['Weekly_Sales'])

In [None]:
# Store ve Dept değişkenlerini one-hot encoding ile düzenle
X_train = pd.get_dummies(X_train, columns=['Store', 'Dept','Type'])

In [None]:
# Date değişkenini datetime formatına çevir
X_train['Date'] = pd.to_datetime(X_train['Date'], format='%Y-%m-%d')

# Week-of-month değişkenini ekle (1 ay 4 haftadan oluşuyor ve 29, 30, 31. günler son hafta)
def calculate_week_of_month(date):
    day = date.day
    if day >= 29:
        return 4
    return (day - 1) // 7 + 1

X_train['Week-of-month'] = X_train['Date'].apply(calculate_week_of_month)

# Quarter değişkenini ekle
X_train['Quarter'] = X_train['Date'].dt.quarter

# Date değişkenini düşür
X_train = X_train.drop(columns=['Date'])

# Week-of-month ve Quarter değişkenlerini one-hot encoding ile encode et
X_train = pd.get_dummies(X_train, columns=['Week-of-month', 'Quarter'])

In [None]:
# Boolean değişkenleri sayısal değerlere dönüştür
boolean_columns = X_train.select_dtypes(include=['bool']).columns
X_train[boolean_columns] = X_train[boolean_columns].astype(int)

In [None]:
from sklearn.preprocessing import MinMaxScaler

# Tüm değişkenleri 0-1 aralığına ölçeklendirme (Weekly_Sales hariç)
scaler = MinMaxScaler()
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)

In [None]:
from sklearn.impute import SimpleImputer

# MarkDown değişkenlerini 0 ile doldur
markdown_columns = ['MarkDown1', 'MarkDown2', 'MarkDown3', 'MarkDown4', 'MarkDown5']
X_train[markdown_columns] = X_train[markdown_columns].fillna(0)

# Diğer eksik değerleri SimpleImputer kullanarak doldur
imputer = SimpleImputer(strategy='mean')
X_train = pd.DataFrame(imputer.fit_transform(X_train), columns=X_train.columns)

X_train.head()

Unnamed: 0,Size,IsHoliday,Temperature,Fuel_Price,MarkDown1,MarkDown2,MarkDown3,MarkDown4,MarkDown5,CPI,...,Type_B,Type_C,Week-of-month_1,Week-of-month_2,Week-of-month_3,Week-of-month_4,Quarter_1,Quarter_2,Quarter_3,Quarter_4
0,0.630267,0.0,0.434149,0.0501,0.0,0.0,0.0,0.0,0.0,0.8405,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,0.630267,0.0,0.434149,0.0501,0.0,0.0,0.0,0.0,0.0,0.8405,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,0.630267,0.0,0.434149,0.0501,0.0,0.0,0.0,0.0,0.0,0.8405,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,0.630267,0.0,0.434149,0.0501,0.0,0.0,0.0,0.0,0.0,0.8405,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4,0.630267,0.0,0.434149,0.0501,0.0,0.0,0.0,0.0,0.0,0.8405,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.ensemble import GradientBoostingRegressor

# Eğitim ve test setlerine böl
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Bir Lasso Regression modeli oluştur ve eğit
lasso_regressor = Lasso(alpha=1.0)
lasso_regressor.fit(X_train, y_train)

In [None]:
from sklearn.metrics import mean_squared_error, r2_score

# Tahmin yap ve performansı değerlendir
y_pred = lasso_regressor.predict(X_test)
mse = mean_squared_error(y_test[1:31], y_pred[1:31])
rmse = mse ** 0.5
r2 = r2_score(y_test, y_pred)

# İlk 10 tahmin ve gerçek değeri ekrana bastır
results = pd.DataFrame({'Gerçek Değer': y_test[1:31].values, 'Tahmin': y_pred[1:31]})
print(results)
print(f'Root Mean Squared Error (RMSE): {rmse}')
print(f'R-squared: {r2}')

    Gerçek Değer        Tahmin
0       15189.41  22602.078709
1        8986.30  19386.350285
2       34429.75  37869.256813
3        1663.64  -2462.026344
4       24777.46  28809.473496
5        1081.00  -2929.588817
6       31136.36  28741.890180
7        9471.58  16500.816316
8       13365.04  11597.497726
9         968.45  14144.849539
10      15260.93  10473.086832
11       2317.71  10121.945436
12         88.89  -8791.311415
13      13757.75  23074.450113
14       4055.48   -731.588801
15      13121.67  23688.819593
16      32521.56  23941.194967
17      21449.32  24893.597461
18       7474.88  10788.717062
19       5417.47  15277.931691
20      51881.84  74967.401625
21      13032.14   3065.772660
22         67.04  -2449.162277
23        244.84  10793.921804
24        232.78  -9196.188446
25      42665.28  38840.726206
26         64.65  10139.920686
27      14381.97  22170.245229
28       5985.37   8497.744870
29      24709.89  42335.941894
Root Mean Squared Error (RMSE): 8881.68