Data

In [1]:
import os
import pandas as pd
import numpy as np

In [None]:

# Tüm görüntüleme sınırlarını kaldırma
# pd.set_option('display.max_rows', None)     # Tüm satırları göster
pd.set_option('display.max_columns', None)  # Tüm sütunları göster
pd.set_option('display.width', None)        # Satır genişlik sınırını kaldır
pd.set_option('display.max_colwidth', None) # Sütun içerik uzunluğu sınırını kaldır

In [None]:
df = pd.read_csv('tests/data.csv', parse_dates=["Date"])
print(df.head(10))

In [None]:
print(df.sample(10))

In [None]:
print(df.head())
print(df.info())


In [None]:
# Value Predictor - Complete Data Science Project

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import classification_report, confusion_matrix, mean_squared_error, r2_score
import warnings
warnings.filterwarnings('ignore')

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

# Data Loading and Initial Exploration
df = pd.read_csv('data.csv', parse_dates=["Date"])
print("Initial Data Overview:")
print(df.head(10))
print("\nRandom Sample:")
print(df.sample(10))
print("\nData Info:")
print(df.info())
print("\nData Description:")
print(df.describe())

# Data Type Conversion
print("\n--- Data Type Conversion ---")
# Convert Date to datetime if not already
df['Date'] = pd.to_datetime(df['Date'])

# Convert numerical columns to appropriate types
numerical_columns = ['Bitcoin', 'Gold', 'Silver', 'Copper', 'Platinum', 'Palladium', 
                    'Nasdaq', 'Spy', 'BrentOil', 'CrudeOil', 'HeatingOil', 'NaturalGas']

for col in numerical_columns:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')

print("Updated Data Info:")
print(df.info())

# Statistical Analysis and Outlier Detection
print("\n--- Statistical Analysis and Outlier Detection ---")

# Find outliers using IQR method
numeric_cols = df.select_dtypes(include=[np.number]).columns
min_values = []
max_values = []

for column in numeric_cols:
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    min_value = Q1 - 1.5 * IQR
    max_value = Q3 + 1.5 * IQR
    min_values.append(min_value)
    max_values.append(max_value)
    print(f"Column: {column}, min: {min_value:.2f}, max: {max_value:.2f}")

# Remove outliers
initial_shape = df.shape
for i, column in enumerate(numeric_cols):
    df = df[(df[column] >= min_values[i]) & (df[column] <= max_values[i])]

print(f"\nData shape before outlier removal: {initial_shape}")
print(f"Data shape after outlier removal: {df.shape}")
print(f"Removed {initial_shape[0] - df.shape[0]} outlier rows")

print("\nCleaned Data Description:")
print(df.describe())

# Correlation Analysis
print("\n--- Correlation Analysis ---")
correlation_matrix = df[numeric_cols].corr()
print("Correlation Matrix:")
print(correlation_matrix)

# Feature Engineering
print("\n--- Feature Engineering ---")
# Add time-based features
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['DayOfWeek'] = df['Date'].dt.dayofweek
df['Quarter'] = df['Date'].dt.quarter

# Add moving averages (if enough data)
if len(df) > 30:
    for col in ['Bitcoin', 'Gold', 'Nasdaq']:
        if col in df.columns:
            df[f'{col}_MA7'] = df[col].rolling(window=7).mean()
            df[f'{col}_MA30'] = df[col].rolling(window=30).mean()

# Drop rows with NaN values created by moving averages
df = df.dropna()

print(f"Data shape after feature engineering: {df.shape}")

# Model Preparation
print("\n--- Model Preparation ---")

# Define features for modeling (excluding Date and target)
feature_columns = [col for col in df.columns if col not in ['Date']]
categorical_features = ['Year', 'Month', 'DayOfWeek', 'Quarter']
numerical_features = [col for col in feature_columns if col not in categorical_features]

# Choose target variable (Bitcoin for this example)
target_variable = 'Bitcoin'
X_features = [col for col in feature_columns if col != target_variable]

print(f"Target variable: {target_variable}")
print(f"Feature columns: {X_features}")
print(f"Categorical features: {categorical_features}")
print(f"Numerical features: {[col for col in numerical_features if col != target_variable]}")

# Prepare data for modeling
X = df[X_features]
y = df[target_variable]

# Create preprocessing pipeline
numerical_features_final = [col for col in numerical_features if col != target_variable and col in X_features]
categorical_features_final = [col for col in categorical_features if col in X_features]

full_pipeline = ColumnTransformer([
    ('num', StandardScaler(), numerical_features_final),
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features_final)
])

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training set size: {X_train.shape}")
print(f"Test set size: {X_test.shape}")

# Linear Regression Model
print("\n--- Linear Regression Model ---")

regression_model = Pipeline([
    ('preparation', full_pipeline),
    ('model', LinearRegression())
])

regression_model.fit(X_train, y_train)
y_pred_reg = regression_model.predict(X_test)

# Regression metrics
mse = mean_squared_error(y_test, y_pred_reg)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred_reg)

print(f"MSE: {mse:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R²: {r2:.4f}")

# Custom tolerance metrics
def tolerance_r2(y_true, y_pred, tolerance):
    residuals = y_pred - y_true
    residuals[np.abs(residuals) <= tolerance] = 0
    ssr = np.sum(residuals**2)
    sst = np.sum((y_true - np.mean(y_true))**2)
    return 1 - (ssr / sst)

def tolerance_percentage_r2(y_true, y_pred, tolerance):
    residuals = y_pred - y_true
    residuals[(np.abs(residuals) / y_true) <= tolerance] = 0
    ssr = np.sum(residuals**2)
    sst = np.sum((y_true - np.mean(y_true))**2)
    return 1 - (ssr / sst)

print(f"Standard R²: {r2_score(y_test, y_pred_reg):.4f}")
print(f"Tolerance R² (±1000): {tolerance_r2(y_test, y_pred_reg, 1000):.4f}")
print(f"Tolerance R² (±10%): {tolerance_percentage_r2(y_test, y_pred_reg, 0.10):.4f}")

# Classification Model (Price Categories)
print("\n--- Classification Model ---")

# Create price categories for classification
y_classification = y.copy()
bins = np.percentile(y_classification, [0, 20, 40, 60, 80, 100])
labels = [1, 2, 3, 4, 5]
y_classification = pd.cut(y_classification, bins=bins, labels=labels, include_lowest=True)

print(f"Price bins: {bins}")
print(f"Price categories distribution:")
print(y_classification.value_counts().sort_index())

# Split data for classification
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(
    X, y_classification, test_size=0.2, random_state=42
)

# Classification pipeline
classification_model = Pipeline([
    ('preparation', full_pipeline),
    ('model', RandomForestClassifier(n_estimators=100, random_state=42))
])

classification_model.fit(X_train_class, y_train_class)
y_pred_class = classification_model.predict(X_test_class)

# Classification metrics
print("\nConfusion Matrix:")
print(confusion_matrix(y_test_class, y_pred_class))
print("\nClassification Report:")
print(classification_report(y_test_class, y_pred_class))

# Feature Importance Analysis
print("\n--- Feature Importance Analysis ---")

# Get feature names after preprocessing
feature_names = (numerical_features_final + 
                list(classification_model.named_steps['preparation']
                    .named_transformers_['cat'].get_feature_names_out(categorical_features_final)))

# Random Forest feature importance
rf_importance = classification_model.named_steps['model'].feature_importances_
feature_importance_df = pd.DataFrame({
    'feature': feature_names,
    'importance': rf_importance
}).sort_values('importance', ascending=False)

print("Top 10 Most Important Features:")
print(feature_importance_df.head(10))

# Linear Regression coefficients
lr_coef = regression_model.named_steps['model'].coef_
coef_df = pd.DataFrame({
    'feature': feature_names,
    'coefficient': lr_coef
}).sort_values('coefficient', key=abs, ascending=False)

print("\nTop 10 Features by Linear Regression Coefficient:")
print(coef_df.head(10))

# Prediction Example
print("\n--- Prediction Example ---")

# Create a sample prediction
if len(df) > 0:
    sample_data = df[X_features].iloc[-1:].copy()  # Use last row as example
    
    print("Sample input data:")
    print(sample_data)
    
    reg_prediction = regression_model.predict(sample_data)
    class_prediction = classification_model.predict(sample_data)
    
    print(f"\nRegression prediction: {reg_prediction[0]:.2f}")
    print(f"Classification prediction: Category {class_prediction[0]}")
    print(f"Actual value: {df[target_variable].iloc[-1]:.2f}")

# Save cleaned data
df.to_csv('data_cleaned.csv', index=False)
print(f"\nCleaned data saved to 'data_cleaned.csv'")
print(f"Final dataset shape: {df.shape}")

# Model Performance Summary
print("\n--- Model Performance Summary ---")
print(f"Linear Regression R²: {r2:.4f}")
print(f"Classification Accuracy: {(y_pred_class == y_test_class).mean():.4f}")
print(f"Data points used: {len(df)}")
print(f"Features used: {len(X_features)}")

In [2]:
import os
import pandas as pd
import numpy as np

In [5]:
btc = pd.read_csv('price/Bitcoin.csv')
gold = pd.read_csv('price/Gold.csv')
silver = pd.read_csv('price/Silver.csv')

In [6]:
btc.head(10)

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,06/20/2025,103280.5,104669.6,106520.3,102396.8,51.48K,-1.33%
1,06/19/2025,104669.6,104894.2,105220.5,103934.5,28.47K,-0.21%
2,06/18/2025,104894.2,104571.4,105553.0,103622.4,45.60K,0.32%
3,06/17/2025,104559.8,106769.8,107747.4,103386.3,60.73K,-2.04%
4,06/16/2025,106740.4,105594.0,108899.3,104993.8,50.95K,1.09%
5,06/15/2025,105591.8,105421.6,106098.9,104502.0,26.22K,0.16%
6,06/14/2025,105421.5,106028.5,106196.9,104321.0,30.99K,-0.62%
7,06/13/2025,106080.3,105702.7,106147.0,102739.6,73.84K,0.36%
8,06/12/2025,105703.0,108662.2,108776.2,105703.0,59.87K,-2.72%
9,06/11/2025,108661.2,110269.2,110386.1,108081.9,44.80K,-1.46%


In [7]:
btc.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4748 entries, 0 to 4747
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Date      4748 non-null   object
 1   Price     4748 non-null   object
 2   Open      4748 non-null   object
 3   High      4748 non-null   object
 4   Low       4748 non-null   object
 5   Vol.      4748 non-null   object
 6   Change %  4748 non-null   object
dtypes: object(7)
memory usage: 259.8+ KB


In [9]:
df = pd.DataFrame()

In [10]:
df['Date'] = btc['Date']

In [11]:
df.head(10)

Unnamed: 0,Date
0,06/20/2025
1,06/19/2025
2,06/18/2025
3,06/17/2025
4,06/16/2025
5,06/15/2025
6,06/14/2025
7,06/13/2025
8,06/12/2025
9,06/11/2025


In [13]:
df.head(10)

Unnamed: 0,Date,Bitcoin
0,06/20/2025,103280.5
1,06/19/2025,104669.6
2,06/18/2025,104894.2
3,06/17/2025,104559.8
4,06/16/2025,106740.4
5,06/15/2025,105591.8
6,06/14/2025,105421.5
7,06/13/2025,106080.3
8,06/12/2025,105703.0
9,06/11/2025,108661.2


In [14]:
df['Gold'] = gold.iloc[:, 1]

In [15]:
df.head(10)

Unnamed: 0,Date,Bitcoin,Gold
0,06/20/2025,103280.5,3385.7
1,06/19/2025,104669.6,3384.97
2,06/18/2025,104894.2,3408.1
3,06/17/2025,104559.8,3406.9
4,06/16/2025,106740.4,3417.3
5,06/15/2025,105591.8,3464.22
6,06/14/2025,105421.5,3452.8
7,06/13/2025,106080.3,3402.4
8,06/12/2025,105703.0,3343.7
9,06/11/2025,108661.2,3343.4


In [16]:
df['Silver'] = silver.iloc[:, 1]


In [17]:
df.head(10)

Unnamed: 0,Date,Bitcoin,Gold,Silver
0,06/20/2025,103280.5,3385.7,36.017
1,06/19/2025,104669.6,3384.97,36.368
2,06/18/2025,104894.2,3408.1,36.913
3,06/17/2025,104559.8,3406.9,37.151
4,06/16/2025,106740.4,3417.3,36.448
5,06/15/2025,105591.8,3464.22,36.373
6,06/14/2025,105421.5,3452.8,36.355
7,06/13/2025,106080.3,3402.4,36.295
8,06/12/2025,105703.0,3343.7,36.261
9,06/11/2025,108661.2,3343.4,36.642


In [None]:
btc = pd.read_csv('price/Bitcoin.csv')
gold = pd.read_csv('price/Gold.csv')
silver = pd.read_csv('price/Silver.csv')
copper = pd.read_csv('price/Copper.csv')
platinum = pd.read_csv('price/Platinum.csv')
palladium = pd.read_csv('price/Palladium.csv')
aluminium = pd.read_csv('price/Aluminium.csv')
lead = pd.read_csv('price/Lead.csv')
zinc = pd.read_csv('price/Zinc.csv')
nickel = pd.read_csv('price/Nickel.csv')
tin = pd.read_csv('price/Tin.csv')

In [None]:
df['Bitcoin'] = btc.iloc[:, 1]
df['Gold'] = gold.iloc[:, 1]
df['Silver'] = silver.iloc[:, 1]
df['Copper'] = copper.iloc[:, 1]
df['Platinum'] = platinum.iloc[:, 1]
df['Palladium'] = palladium.iloc[:, 1]
df['Aluminium'] = aluminium.iloc[:, 1]
df['Lead'] = lead.iloc[:, 1]
df['Zinc'] = zinc.iloc[:, 1]
df['Nickel'] = nickel.iloc[:, 1]
df['Tin'] = tin.iloc[:, 1]

In [24]:
df.head(10)

Unnamed: 0,Date,Bitcoin,Gold,Silver,Copper,Platinum,Palladium,Aluminium,Lead,Zinc,Nickel,Tin
0,06/20/2025,103280.5,3385.7,36.017,4.8335,1264.5,1054.9,2556.5,1992.63,2640.75,14965.63,32815.99
1,06/19/2025,104669.6,3384.97,36.368,4.8135,1295.85,1052.75,2527.25,1993.73,2645.0,15003.38,32041.0
2,06/18/2025,104894.2,3408.1,36.913,4.853,1313.1,1059.8,2545.0,1995.68,2640.8,15039.75,32324.0
3,06/17/2025,104559.8,3406.9,37.151,4.809,1261.7,1059.4,2547.7,1981.68,2639.25,14898.88,32216.01
4,06/16/2025,106740.4,3417.3,36.448,4.836,1253.2,1037.8,2518.4,2009.88,2661.4,15026.13,32553.0
5,06/15/2025,105591.8,3464.22,36.373,4.786,1226.25,1043.0,2506.05,1997.03,2627.0,15151.88,32603.5
6,06/14/2025,105421.5,3452.8,36.355,4.8145,1211.9,1046.3,2521.7,1996.53,2644.75,15106.88,32563.9
7,06/13/2025,106080.3,3402.4,36.295,4.8355,1275.1,1065.9,2517.95,1989.4,2654.6,15155.63,32570.98
8,06/12/2025,105703.0,3343.7,36.261,4.8145,1259.6,1092.6,2492.35,1986.98,2657.3,15282.75,32706.0
9,06/11/2025,108661.2,3343.4,36.642,4.9,1212.5,1078.8,2480.65,1992.28,2652.5,15372.88,32724.0


In [25]:
brent = pd.read_csv('price/BrentOil.csv')
crude = pd.read_csv('price/CrudeOil.csv')
heating = pd.read_csv('price/HeatingOil.csv')
gasoline = pd.read_csv('price/Gasoline.csv')
natgas = pd.read_csv('price/NaturalGas.csv')


In [26]:
df['BrentOil'] = brent.iloc[:, 1]
df['CrudeOil'] = crude.iloc[:, 1]
df['HeatingOil'] = heating.iloc[:, 1]
df['Gasoline'] = gasoline.iloc[:, 1]
df['NaturalGas'] = natgas.iloc[:, 1]

In [27]:
df.head(10)

Unnamed: 0,Date,Bitcoin,Gold,Silver,Copper,Platinum,Palladium,Aluminium,Lead,Zinc,Nickel,Tin,BrentOil,CrudeOil,HeatingOil,Gasoline,NaturalGas
0,06/20/2025,103280.5,3385.7,36.017,4.8335,1264.5,1054.9,2556.5,1992.63,2640.75,14965.63,32815.99,77.01,74.93,2.5418,2.3295,3.847
1,06/19/2025,104669.6,3384.97,36.368,4.8135,1295.85,1052.75,2527.25,1993.73,2645.0,15003.38,32041.0,78.85,73.82,2.5959,2.3668,4.086
2,06/18/2025,104894.2,3408.1,36.913,4.853,1313.1,1059.8,2545.0,1995.68,2640.8,15039.75,32324.0,76.7,75.14,2.5334,2.3086,3.989
3,06/17/2025,104559.8,3406.9,37.151,4.809,1261.7,1059.4,2547.7,1981.68,2639.25,14898.88,32216.01,76.45,74.84,2.5051,2.2719,3.851
4,06/16/2025,106740.4,3417.3,36.448,4.836,1253.2,1037.8,2518.4,2009.88,2661.4,15026.13,32553.0,73.23,71.77,2.3933,2.2199,3.748
5,06/15/2025,105591.8,3464.22,36.373,4.786,1226.25,1043.0,2506.05,1997.03,2627.0,15151.88,32603.5,74.23,72.72,2.4339,2.2592,3.67
6,06/14/2025,105421.5,3452.8,36.355,4.8145,1211.9,1046.3,2521.7,1996.53,2644.75,15106.88,32563.9,69.36,71.29,2.3587,2.2276,3.581
7,06/13/2025,106080.3,3402.4,36.295,4.8355,1275.1,1065.9,2517.95,1989.4,2654.6,15155.63,32570.98,69.77,66.64,2.1887,2.1429,3.492
8,06/12/2025,105703.0,3343.7,36.261,4.8145,1259.6,1092.6,2492.35,1986.98,2657.3,15282.75,32706.0,66.87,66.9,2.2053,2.1668,3.507
9,06/11/2025,108661.2,3343.4,36.642,4.9,1212.5,1078.8,2480.65,1992.28,2652.5,15372.88,32724.0,67.04,63.91,2.1416,2.088,3.533


In [28]:
us30 = pd.read_csv('price/US30.csv')
us500 = pd.read_csv('price/US500.csv')
sp500 = pd.read_csv('price/S&P500.csv')
dowjones = pd.read_csv('price/DowJones.csv')
nasdaq = pd.read_csv('price/NASDAQ.csv')
cboe = pd.read_csv('price/CBOE.csv')
dxy = pd.read_csv('price/USDollarIndex.csv')

In [29]:
df['US30'] = us30.iloc[:, 1]
df['US500'] = us500.iloc[:, 1]
df['S&P500'] = sp500.iloc[:, 1]
df['DowJones'] = dowjones.iloc[:, 1]
df['NASDAQ'] = nasdaq.iloc[:, 1]
df['CBOE'] = cboe.iloc[:, 1]
df['USDollarIndex'] = dxy.iloc[:, 1]

In [30]:
df.head(10)

Unnamed: 0,Date,Bitcoin,Gold,Silver,Copper,Platinum,Palladium,Aluminium,Lead,Zinc,...,HeatingOil,Gasoline,NaturalGas,US30,US500,S&P500,DowJones,NASDAQ,CBOE,USDollarIndex
0,06/20/2025,103280.5,3385.7,36.017,4.8335,1264.5,1054.9,2556.5,1992.63,2640.75,...,2.5418,2.3295,3.847,42254.3,5973.8,5967.84,42206.82,19447.41,20.62,98.282
1,06/19/2025,104669.6,3384.97,36.368,4.8135,1295.85,1052.75,2527.25,1993.73,2645.0,...,2.5959,2.3668,4.086,42018.0,5962.9,5980.87,42171.66,19546.27,22.17,98.468
2,06/18/2025,104894.2,3408.1,36.913,4.853,1313.1,1059.8,2545.0,1995.68,2640.8,...,2.5334,2.3086,3.989,42171.0,5980.5,5982.72,42215.8,19521.09,20.14,98.468
3,06/17/2025,104559.8,3406.9,37.151,4.809,1261.7,1059.4,2547.7,1981.68,2639.25,...,2.5051,2.2719,3.851,42112.0,5968.8,6033.11,42515.09,19701.21,21.6,98.394
4,06/16/2025,106740.4,3417.3,36.448,4.836,1253.2,1037.8,2518.4,2009.88,2661.4,...,2.3933,2.2199,3.748,42274.5,5995.5,5976.97,42197.79,19406.83,19.11,97.702
5,06/15/2025,105591.8,3464.22,36.373,4.786,1226.25,1043.0,2506.05,1997.03,2627.0,...,2.4339,2.2592,3.67,42202.0,5984.3,6045.26,42967.62,19662.48,20.82,98.176
6,06/14/2025,105421.5,3452.8,36.355,4.8145,1211.9,1046.3,2521.7,1996.53,2644.75,...,2.3587,2.2276,3.581,42160.1,5975.5,6022.24,42865.77,19615.88,18.02,97.901
7,06/13/2025,106080.3,3402.4,36.295,4.8355,1275.1,1065.9,2517.95,1989.4,2654.6,...,2.1887,2.1429,3.492,42760.5,6011.1,6038.81,42866.87,19714.99,17.26,98.604
8,06/12/2025,105703.0,3343.7,36.261,4.8145,1259.6,1092.6,2492.35,1986.98,2657.3,...,2.2053,2.1668,3.507,42814.0,6014.3,6005.88,42761.76,19591.24,16.95,99.07
9,06/11/2025,108661.2,3343.4,36.642,4.9,1212.5,1078.8,2480.65,1992.28,2652.5,...,2.1416,2.088,3.533,42852.5,6037.1,6000.36,42762.87,19529.95,17.16,98.442


In [31]:
df['Date'] = pd.to_datetime(df['Date'], format='%m/%d/%Y', errors='coerce')


In [32]:
df['Date'] = df['Date'].dt.strftime('%d/%m/%Y')

In [33]:
df.head(10)

Unnamed: 0,Date,Bitcoin,Gold,Silver,Copper,Platinum,Palladium,Aluminium,Lead,Zinc,...,HeatingOil,Gasoline,NaturalGas,US30,US500,S&P500,DowJones,NASDAQ,CBOE,USDollarIndex
0,20/06/2025,103280.5,3385.7,36.017,4.8335,1264.5,1054.9,2556.5,1992.63,2640.75,...,2.5418,2.3295,3.847,42254.3,5973.8,5967.84,42206.82,19447.41,20.62,98.282
1,19/06/2025,104669.6,3384.97,36.368,4.8135,1295.85,1052.75,2527.25,1993.73,2645.0,...,2.5959,2.3668,4.086,42018.0,5962.9,5980.87,42171.66,19546.27,22.17,98.468
2,18/06/2025,104894.2,3408.1,36.913,4.853,1313.1,1059.8,2545.0,1995.68,2640.8,...,2.5334,2.3086,3.989,42171.0,5980.5,5982.72,42215.8,19521.09,20.14,98.468
3,17/06/2025,104559.8,3406.9,37.151,4.809,1261.7,1059.4,2547.7,1981.68,2639.25,...,2.5051,2.2719,3.851,42112.0,5968.8,6033.11,42515.09,19701.21,21.6,98.394
4,16/06/2025,106740.4,3417.3,36.448,4.836,1253.2,1037.8,2518.4,2009.88,2661.4,...,2.3933,2.2199,3.748,42274.5,5995.5,5976.97,42197.79,19406.83,19.11,97.702
5,15/06/2025,105591.8,3464.22,36.373,4.786,1226.25,1043.0,2506.05,1997.03,2627.0,...,2.4339,2.2592,3.67,42202.0,5984.3,6045.26,42967.62,19662.48,20.82,98.176
6,14/06/2025,105421.5,3452.8,36.355,4.8145,1211.9,1046.3,2521.7,1996.53,2644.75,...,2.3587,2.2276,3.581,42160.1,5975.5,6022.24,42865.77,19615.88,18.02,97.901
7,13/06/2025,106080.3,3402.4,36.295,4.8355,1275.1,1065.9,2517.95,1989.4,2654.6,...,2.1887,2.1429,3.492,42760.5,6011.1,6038.81,42866.87,19714.99,17.26,98.604
8,12/06/2025,105703.0,3343.7,36.261,4.8145,1259.6,1092.6,2492.35,1986.98,2657.3,...,2.2053,2.1668,3.507,42814.0,6014.3,6005.88,42761.76,19591.24,16.95,99.07
9,11/06/2025,108661.2,3343.4,36.642,4.9,1212.5,1078.8,2480.65,1992.28,2652.5,...,2.1416,2.088,3.533,42852.5,6037.1,6000.36,42762.87,19529.95,17.16,98.442


In [34]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4748 entries, 0 to 4747
Data columns (total 24 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Date           4748 non-null   object 
 1   Bitcoin        4748 non-null   object 
 2   Gold           3324 non-null   object 
 3   Silver         3369 non-null   float64
 4   Copper         3363 non-null   float64
 5   Platinum       3812 non-null   object 
 6   Palladium      3619 non-null   object 
 7   Aluminium      2785 non-null   object 
 8   Lead           3285 non-null   object 
 9   Zinc           3287 non-null   object 
 10  Nickel         3193 non-null   object 
 11  Tin            3286 non-null   object 
 12  BrentOil       3358 non-null   float64
 13  CrudeOil       3407 non-null   float64
 14  HeatingOil     3411 non-null   float64
 15  Gasoline       3360 non-null   float64
 16  NaturalGas     3407 non-null   float64
 17  US30           3607 non-null   object 
 18  US500   

In [36]:
df = df.ffill()
df = df.bfill()

In [37]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4748 entries, 0 to 4747
Data columns (total 24 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Date           4748 non-null   object 
 1   Bitcoin        4748 non-null   object 
 2   Gold           4748 non-null   object 
 3   Silver         4748 non-null   float64
 4   Copper         4748 non-null   float64
 5   Platinum       4748 non-null   object 
 6   Palladium      4748 non-null   object 
 7   Aluminium      4748 non-null   object 
 8   Lead           4748 non-null   object 
 9   Zinc           4748 non-null   object 
 10  Nickel         4748 non-null   object 
 11  Tin            4748 non-null   object 
 12  BrentOil       4748 non-null   float64
 13  CrudeOil       4748 non-null   float64
 14  HeatingOil     4748 non-null   float64
 15  Gasoline       4748 non-null   float64
 16  NaturalGas     4748 non-null   float64
 17  US30           4748 non-null   object 
 18  US500   

In [39]:
df['Date'] = pd.to_datetime(df['Date'], errors='coerce', dayfirst=True)
df['Date'] = df['Date'].astype('category')

In [41]:
df['Bitcoin'] = df['Bitcoin'].astype(float)
df['Gold'] = df['Gold'].astype(float)
df['Silver'] = df['Silver'].astype(float)
df['Copper'] = df['Copper'].astype(float)
df['Platinum'] = df['Platinum'].astype(float)
df['Palladium'] = df['Palladium'].astype(float)
df['Aluminium'] = df['Aluminium'].astype(float)
df['Lead'] = df['Lead'].astype(float)
df['Zinc'] = df['Zinc'].astype(float)
df['Nickel'] = df['Nickel'].astype(float)
df['Tin'] = df['Tin'].astype(float)
df['BrentOil'] = df['BrentOil'].astype(float)
df['CrudeOil'] = df['CrudeOil'].astype(float)
df['HeatingOil'] = df['HeatingOil'].astype(float)
df['Gasoline'] = df['Gasoline'].astype(float)
df['NaturalGas'] = df['NaturalGas'].astype(float)
df['US30'] = df['US30'].astype(float)
df['US500'] = df['US500'].astype(float)
df['S&P500'] = df['S&P500'].astype(float)
df['DowJones'] = df['DowJones'].astype(float)
df['NASDAQ'] = df['NASDAQ'].astype(float)
df['CBOE'] = df['CBOE'].astype(float)
df['USDollarIndex'] = df['USDollarIndex'].astype(float)

ValueError: could not convert string to float: '103,280.5'

In [42]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4748 entries, 0 to 4747
Data columns (total 24 columns):
 #   Column         Non-Null Count  Dtype   
---  ------         --------------  -----   
 0   Date           4748 non-null   category
 1   Bitcoin        4748 non-null   object  
 2   Gold           4748 non-null   object  
 3   Silver         4748 non-null   float64 
 4   Copper         4748 non-null   float64 
 5   Platinum       4748 non-null   object  
 6   Palladium      4748 non-null   object  
 7   Aluminium      4748 non-null   object  
 8   Lead           4748 non-null   object  
 9   Zinc           4748 non-null   object  
 10  Nickel         4748 non-null   object  
 11  Tin            4748 non-null   object  
 12  BrentOil       4748 non-null   float64 
 13  CrudeOil       4748 non-null   float64 
 14  HeatingOil     4748 non-null   float64 
 15  Gasoline       4748 non-null   float64 
 16  NaturalGas     4748 non-null   float64 
 17  US30           4748 non-null   ob

In [43]:
df.head(10)

Unnamed: 0,Date,Bitcoin,Gold,Silver,Copper,Platinum,Palladium,Aluminium,Lead,Zinc,...,HeatingOil,Gasoline,NaturalGas,US30,US500,S&P500,DowJones,NASDAQ,CBOE,USDollarIndex
0,2025-06-20,103280.5,3385.7,36.017,4.8335,1264.5,1054.9,2556.5,1992.63,2640.75,...,2.5418,2.3295,3.847,42254.3,5973.8,5967.84,42206.82,19447.41,20.62,98.282
1,2025-06-19,104669.6,3384.97,36.368,4.8135,1295.85,1052.75,2527.25,1993.73,2645.0,...,2.5959,2.3668,4.086,42018.0,5962.9,5980.87,42171.66,19546.27,22.17,98.468
2,2025-06-18,104894.2,3408.1,36.913,4.853,1313.1,1059.8,2545.0,1995.68,2640.8,...,2.5334,2.3086,3.989,42171.0,5980.5,5982.72,42215.8,19521.09,20.14,98.468
3,2025-06-17,104559.8,3406.9,37.151,4.809,1261.7,1059.4,2547.7,1981.68,2639.25,...,2.5051,2.2719,3.851,42112.0,5968.8,6033.11,42515.09,19701.21,21.6,98.394
4,2025-06-16,106740.4,3417.3,36.448,4.836,1253.2,1037.8,2518.4,2009.88,2661.4,...,2.3933,2.2199,3.748,42274.5,5995.5,5976.97,42197.79,19406.83,19.11,97.702
5,2025-06-15,105591.8,3464.22,36.373,4.786,1226.25,1043.0,2506.05,1997.03,2627.0,...,2.4339,2.2592,3.67,42202.0,5984.3,6045.26,42967.62,19662.48,20.82,98.176
6,2025-06-14,105421.5,3452.8,36.355,4.8145,1211.9,1046.3,2521.7,1996.53,2644.75,...,2.3587,2.2276,3.581,42160.1,5975.5,6022.24,42865.77,19615.88,18.02,97.901
7,2025-06-13,106080.3,3402.4,36.295,4.8355,1275.1,1065.9,2517.95,1989.4,2654.6,...,2.1887,2.1429,3.492,42760.5,6011.1,6038.81,42866.87,19714.99,17.26,98.604
8,2025-06-12,105703.0,3343.7,36.261,4.8145,1259.6,1092.6,2492.35,1986.98,2657.3,...,2.2053,2.1668,3.507,42814.0,6014.3,6005.88,42761.76,19591.24,16.95,99.07
9,2025-06-11,108661.2,3343.4,36.642,4.9,1212.5,1078.8,2480.65,1992.28,2652.5,...,2.1416,2.088,3.533,42852.5,6037.1,6000.36,42762.87,19529.95,17.16,98.442


In [44]:
# Date hariç tüm sütunları işle
cols_to_convert = df.columns.drop('Date')

for col in cols_to_convert:
    df[col] = df[col].astype(str).str.replace(',', '').astype(float)

In [45]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4748 entries, 0 to 4747
Data columns (total 24 columns):
 #   Column         Non-Null Count  Dtype   
---  ------         --------------  -----   
 0   Date           4748 non-null   category
 1   Bitcoin        4748 non-null   float64 
 2   Gold           4748 non-null   float64 
 3   Silver         4748 non-null   float64 
 4   Copper         4748 non-null   float64 
 5   Platinum       4748 non-null   float64 
 6   Palladium      4748 non-null   float64 
 7   Aluminium      4748 non-null   float64 
 8   Lead           4748 non-null   float64 
 9   Zinc           4748 non-null   float64 
 10  Nickel         4748 non-null   float64 
 11  Tin            4748 non-null   float64 
 12  BrentOil       4748 non-null   float64 
 13  CrudeOil       4748 non-null   float64 
 14  HeatingOil     4748 non-null   float64 
 15  Gasoline       4748 non-null   float64 
 16  NaturalGas     4748 non-null   float64 
 17  US30           4748 non-null   fl

In [46]:
df.head(10)

Unnamed: 0,Date,Bitcoin,Gold,Silver,Copper,Platinum,Palladium,Aluminium,Lead,Zinc,...,HeatingOil,Gasoline,NaturalGas,US30,US500,S&P500,DowJones,NASDAQ,CBOE,USDollarIndex
0,2025-06-20,103280.5,3385.7,36.017,4.8335,1264.5,1054.9,2556.5,1992.63,2640.75,...,2.5418,2.3295,3.847,42254.3,5973.8,5967.84,42206.82,19447.41,20.62,98.282
1,2025-06-19,104669.6,3384.97,36.368,4.8135,1295.85,1052.75,2527.25,1993.73,2645.0,...,2.5959,2.3668,4.086,42018.0,5962.9,5980.87,42171.66,19546.27,22.17,98.468
2,2025-06-18,104894.2,3408.1,36.913,4.853,1313.1,1059.8,2545.0,1995.68,2640.8,...,2.5334,2.3086,3.989,42171.0,5980.5,5982.72,42215.8,19521.09,20.14,98.468
3,2025-06-17,104559.8,3406.9,37.151,4.809,1261.7,1059.4,2547.7,1981.68,2639.25,...,2.5051,2.2719,3.851,42112.0,5968.8,6033.11,42515.09,19701.21,21.6,98.394
4,2025-06-16,106740.4,3417.3,36.448,4.836,1253.2,1037.8,2518.4,2009.88,2661.4,...,2.3933,2.2199,3.748,42274.5,5995.5,5976.97,42197.79,19406.83,19.11,97.702
5,2025-06-15,105591.8,3464.22,36.373,4.786,1226.25,1043.0,2506.05,1997.03,2627.0,...,2.4339,2.2592,3.67,42202.0,5984.3,6045.26,42967.62,19662.48,20.82,98.176
6,2025-06-14,105421.5,3452.8,36.355,4.8145,1211.9,1046.3,2521.7,1996.53,2644.75,...,2.3587,2.2276,3.581,42160.1,5975.5,6022.24,42865.77,19615.88,18.02,97.901
7,2025-06-13,106080.3,3402.4,36.295,4.8355,1275.1,1065.9,2517.95,1989.4,2654.6,...,2.1887,2.1429,3.492,42760.5,6011.1,6038.81,42866.87,19714.99,17.26,98.604
8,2025-06-12,105703.0,3343.7,36.261,4.8145,1259.6,1092.6,2492.35,1986.98,2657.3,...,2.2053,2.1668,3.507,42814.0,6014.3,6005.88,42761.76,19591.24,16.95,99.07
9,2025-06-11,108661.2,3343.4,36.642,4.9,1212.5,1078.8,2480.65,1992.28,2652.5,...,2.1416,2.088,3.533,42852.5,6037.1,6000.36,42762.87,19529.95,17.16,98.442


In [47]:
df['Date'] = pd.to_datetime(df['Date'], errors='coerce', dayfirst=True)
df['Date'] = df['Date'].astype('category')

In [66]:
df.head(10)

Unnamed: 0,Date,Bitcoin,Gold,Silver,Copper,Platinum,Palladium,Aluminium,Lead,Zinc,...,HeatingOil,Gasoline,NaturalGas,US30,US500,S&P500,DowJones,NASDAQ,CBOE,USDollarIndex
0,2012-06-21,6.7,1615.8,28.389,3.393,1457.8,618.35,1840.25,1816.75,1819.5,...,2.5846,2.5902,2.517,12824.4,1355.7,1355.7,12824.39,2930.45,17.24,81.734
1,2012-06-22,6.6,1615.8,28.389,3.393,1457.8,618.35,1840.25,1816.75,1819.5,...,2.5846,2.5902,2.517,12824.4,1355.7,1355.7,12824.39,2930.45,17.24,81.734
2,2012-06-23,6.4,1615.8,28.389,3.393,1457.8,618.35,1840.25,1816.75,1819.5,...,2.5846,2.5902,2.517,12824.4,1355.7,1355.7,12824.39,2930.45,17.24,81.734
3,2012-06-24,6.3,1615.8,28.389,3.393,1457.8,618.35,1840.25,1816.75,1819.5,...,2.5846,2.5902,2.517,12824.4,1355.7,1355.7,12824.39,2930.45,17.24,81.734
4,2012-06-25,6.3,1615.8,28.389,3.393,1457.8,618.35,1840.25,1816.75,1819.5,...,2.5846,2.5902,2.517,12824.4,1355.7,1355.7,12824.39,2930.45,17.24,81.734
5,2012-06-26,6.4,1615.8,28.389,3.393,1457.8,618.35,1840.25,1816.75,1819.5,...,2.5846,2.5902,2.517,12824.4,1355.7,1355.7,12824.39,2930.45,17.24,81.734
6,2012-06-27,6.7,1615.8,28.389,3.393,1457.8,618.35,1840.25,1816.75,1819.5,...,2.5846,2.5902,2.517,12824.4,1355.7,1355.7,12824.39,2930.45,17.24,81.734
7,2012-06-28,6.6,1615.8,28.389,3.393,1457.8,618.35,1840.25,1816.75,1819.5,...,2.5846,2.5902,2.517,12824.4,1355.7,1355.7,12824.39,2930.45,17.24,81.734
8,2012-06-29,6.7,1615.8,28.389,3.393,1457.8,618.35,1840.25,1816.75,1819.5,...,2.5846,2.5902,2.517,12824.4,1355.7,1355.7,12824.39,2930.45,17.24,81.734
9,2012-06-30,6.7,1615.8,28.389,3.393,1457.8,618.35,1840.25,1816.75,1819.5,...,2.5846,2.5902,2.517,12824.4,1355.7,1355.7,12824.39,2930.45,17.24,81.734


In [67]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4748 entries, 0 to 4747
Data columns (total 24 columns):
 #   Column         Non-Null Count  Dtype   
---  ------         --------------  -----   
 0   Date           4748 non-null   category
 1   Bitcoin        4748 non-null   float64 
 2   Gold           4748 non-null   float64 
 3   Silver         4748 non-null   float64 
 4   Copper         4748 non-null   float64 
 5   Platinum       4748 non-null   float64 
 6   Palladium      4748 non-null   float64 
 7   Aluminium      4748 non-null   float64 
 8   Lead           4748 non-null   float64 
 9   Zinc           4748 non-null   float64 
 10  Nickel         4748 non-null   float64 
 11  Tin            4748 non-null   float64 
 12  BrentOil       4748 non-null   float64 
 13  CrudeOil       4748 non-null   float64 
 14  HeatingOil     4748 non-null   float64 
 15  Gasoline       4748 non-null   float64 
 16  NaturalGas     4748 non-null   float64 
 17  US30           4748 non-null   fl

In [50]:
# Önce string olan tarihleri datetime nesnesine çevir
df['Date'] = pd.to_datetime(df['Date'])

# Şimdi 'gün/ay/yıl' formatına dönüştür (string olarak)
df['Date'] = df['Date'].dt.strftime('%d/%m/%Y')

In [53]:
df['Date'] = pd.to_datetime(df['Date'], errors='coerce', dayfirst=True)
df['Date'] = df['Date'].astype('category')

In [57]:
df.to_csv('index.csv', index=False)  # index'i kaydetme

In [58]:
df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)  # dayfirst=True ile gün/ay/yıl olarak parse eder

In [59]:
df = df.sort_values('Date', ascending=True)  # ascending=True ile en eski tarih en üste gelir

In [62]:
df = df.reset_index(drop=True)  # Eski indeksi tamamen sil, yeni indeks 0'dan başlasın

In [65]:
df['Date'] = pd.to_datetime(df['Date'], dayfirst=True).sort_values().astype('category')

In [68]:
df.to_csv('index.csv', index=False)  # index'i kaydetme