In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from imblearn.over_sampling import SMOTE
from sklearn.metrics import classification_report, accuracy_score
import xgboost as xgb

# 1. Eksik ve hatalı değerleri temizleme
data = pd.read_csv("/content/sample_data/csv_building_structure.csv")  # Verinizin bulunduğu dosya

# Kullanılacak sütunlar
# numerical_columns = ["latitude", "longitude", "age", "plinth_area_sq_ft", "height_ft_pre_eq"]
numerical_columns = ["age_building", "plinth_area_sq_ft", "height_ft_pre_eq"]
# categorical_columns = [
#    "land_surface_condition", "foundation_type", "ground_floor_type", "roof_type",
#    "superstructure_type", "plan_configuration", "position"
#]

categorical_columns = [
    "land_surface_condition", "foundation_type", "ground_floor_type", "roof_type",
    "plan_configuration", "position"
]
target_damage = 'damage_grade'  # Hasar derecesi hedefi
target_rehabilitation = 'technical_solution_proposed'  # Rehabilitasyon hedefi

FileNotFoundError: [Errno 2] No such file or directory: 'data/csv_building_structure.csv'

In [None]:
# Belirtilen sütunlar ve hedef değişkenler dışındaki sütunları çıkarma
data = data[numerical_columns + categorical_columns + [target_damage, target_rehabilitation]]

# Kategorik sütunlardaki eksik ve hatalı verileri temizleme
data[categorical_columns] = data[categorical_columns].apply(lambda x: x.str.lower().str.strip())
data.fillna('unknown', inplace=True)  # Eksik değerleri 'unknown' ile doldur

# 2. Hedef ve girdi değişkenlerini belirleme
features = numerical_columns + categorical_columns

# Kategorik verileri OneHotEncode etme
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
encoded_features = encoder.fit_transform(data[categorical_columns])
onehot_columns = encoder.get_feature_names_out(categorical_columns)

# Sayısal ve kategorik değişkenleri birleştirme
final_data = pd.concat([
    data[numerical_columns].reset_index(drop=True),
    pd.DataFrame(encoded_features, columns=onehot_columns).reset_index(drop=True)
], axis=1)

In [None]:
# 3. Hedef değişkenleri sayısala çevirme
label_encoder = LabelEncoder()
Y_damage = label_encoder.fit_transform(data[target_damage])
Y_rehabilitation = label_encoder.fit_transform(data[target_rehabilitation])

# 4. SMOTE ile sınıf dengesini sağlama
smote = SMOTE(random_state=42)
X_damage_balanced, Y_damage_balanced = smote.fit_resample(final_data, Y_damage)
X_rehabilitation_balanced, Y_rehabilitation_balanced = smote.fit_resample(final_data, Y_rehabilitation)

# 5. Eğitim ve test verisi bölme
X_train_damage, X_test_damage, Y_train_damage, Y_test_damage = train_test_split(
    X_damage_balanced, Y_damage_balanced, test_size=0.33, random_state=42
)
X_train_rehab, X_test_rehab, Y_train_rehab, Y_test_rehab = train_test_split(
    X_rehabilitation_balanced, Y_rehabilitation_balanced, test_size=0.33, random_state=42
)

In [None]:
# 6. XGBoost Modeli ile Tahmin
# Hasar Derecesi Tahmini
xgb_damage = xgb.XGBClassifier(objective='multi:softmax', num_class=len(np.unique(Y_damage)), random_state=42)
xgb_damage.fit(X_train_damage, Y_train_damage)

Y_pred_damage = xgb_damage.predict(X_test_damage)
print("Hasar Derecesi Tahmini:")
print(classification_report(Y_test_damage, Y_pred_damage))
print("Doğruluk:", accuracy_score(Y_test_damage, Y_pred_damage))

# Rehabilitasyon Müdahalesi Tahmini
xgb_rehab = xgb.XGBClassifier(objective='binary:logistic', random_state=42)
xgb_rehab.fit(X_train_rehab, Y_train_rehab)

Y_pred_rehab = xgb_rehab.predict(X_test_rehab)
print("Rehabilitasyon Müdahalesi Tahmini:")
print(classification_report(Y_test_rehab, Y_pred_rehab))
print("Doğruluk:", accuracy_score(Y_test_rehab, Y_pred_rehab))