In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

In [None]:
df_dropna = pd.read_csv('hardness_group.csv')

In [None]:
mapping = {'river': 0, 'lake': 1, 'ocean': 2}
df_dropna['Origin'] = df_dropna['Origin'].replace(mapping)

df_dropna = df_dropna.drop(columns=['Unnamed: 0'])
df_dropna = df_dropna.dropna(axis=0)

In [None]:
df_dropna.head()

Unnamed: 0,Origin,Solids,Chloramines,Sulfate,Conductivity,Organic_carbon,Trihalomethanes,Turbidity,Hardness,ph,Survivability
0,0.0,20791.31898,7.300212,368.516441,564.308654,10.379783,8.954468,2.963135,205.0,7.112959,0
1,0.0,18630.05786,6.635246,349.748446,592.885359,15.180013,4.048808,4.500656,129.0,3.71608,0
2,1.0,19909.54173,9.275884,323.876566,418.606213,16.868637,4.210943,3.055934,224.0,8.099124,0
3,1.0,22018.41744,8.059332,356.886136,363.266516,18.436525,9.236995,4.628771,214.0,8.316766,0
4,1.0,17978.98634,6.5466,310.135738,398.410813,11.558279,3.496447,4.075075,181.0,9.092223,0


In [None]:
X_DN = df_dropna.drop(columns=['Survivability'])  # 특징 데이터
y_DN = df_dropna['Survivability']  # 타겟 데이터

# train : test = 8:2 설정
X_train_DN, X_test_DN, y_train_DN, y_test_DN = train_test_split(X_DN, y_DN, test_size=0.2,random_state=42)

Linear Regression

In [None]:
LRmodel = LinearRegression(fit_intercept=True)
LRmodel.fit(X_train_DN, y_train_DN)

y_pred_DN = LRmodel.predict(X_test_DN)

In [None]:
# 모델 평가
y_pred_DN = (y_pred_DN > 0.5).astype(int)


print("Accuracy:", accuracy_score(y_test_DN, y_pred_DN))
print("Linear Regression Classification Report:")
print(classification_report(y_test_DN, y_pred_DN))

Accuracy: 0.6493108728943339
Linear Regression Classification Report:
              precision    recall  f1-score   support

           0       0.65      1.00      0.79       424
           1       0.00      0.00      0.00       229

    accuracy                           0.65       653
   macro avg       0.32      0.50      0.39       653
weighted avg       0.42      0.65      0.51       653



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Logistic Regression

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_DN)
X_test_scaled = scaler.transform(X_test_DN)


Logistic_model = OneVsRestClassifier(LogisticRegression())
Logistic_model.fit(X_train_scaled, y_train_DN)
y_pred_DN_logi = Logistic_model.predict(X_test_scaled)


print("Accuracy:", accuracy_score(y_test_DN, y_pred_DN_logi))
print("Logistic Regression Classification Report:")
print(classification_report(y_test_DN, y_pred_DN_logi))

Accuracy: 0.6477794793261868
Logistic Regression Classification Report:
              precision    recall  f1-score   support

           0       0.65      1.00      0.79       424
           1       0.00      0.00      0.00       229

    accuracy                           0.65       653
   macro avg       0.32      0.50      0.39       653
weighted avg       0.42      0.65      0.51       653



In [None]:
logreg = LogisticRegression(random_state=42, solver='lbfgs', max_iter=1000)

logreg.fit(X_train_scaled, y_train_DN.astype(int))  # y_train이 정수형인지 확인

y_pred_lr = logreg.predict(X_test_scaled)

print("Accuracy:", accuracy_score(y_test_DN.astype(int), y_pred_lr))
print("Classification Report:\n", classification_report(y_test_DN.astype(int), y_pred_lr))

Accuracy: 0.6477794793261868
Classification Report:
               precision    recall  f1-score   support

           0       0.65      1.00      0.79       424
           1       0.00      0.00      0.00       229

    accuracy                           0.65       653
   macro avg       0.32      0.50      0.39       653
weighted avg       0.42      0.65      0.51       653



Neural Network

In [None]:
#Neural Network
mlp = MLPClassifier(hidden_layer_sizes=(100, 100), activation='relu', solver='adam', max_iter=1000, random_state=42)

mlp.fit(X_train_scaled, y_train_DN.astype(int))

y_pred_NN = mlp.predict(X_test_scaled)
print("Accuracy:", accuracy_score(y_test_DN.astype(int), y_pred_NN))
print("Classification Report:\n", classification_report(y_test_DN.astype(int), y_pred_NN))

Accuracy: 0.6569678407350689
Classification Report:
               precision    recall  f1-score   support

           0       0.74      0.72      0.73       424
           1       0.51      0.54      0.53       229

    accuracy                           0.66       653
   macro avg       0.63      0.63      0.63       653
weighted avg       0.66      0.66      0.66       653



Random Forest

In [None]:
#RandomForestClassifier

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train_scaled, y_train_DN.astype(int))


y_pred_RF = rf_classifier.predict(X_test_scaled)

print("Accuracy:", accuracy_score(y_test_DN.astype(int), y_pred_RF))
print("Classification Report:\n", classification_report(y_test_DN.astype(int), y_pred_RF))

Accuracy: 0.6906584992343032
Classification Report:
               precision    recall  f1-score   support

           0       0.72      0.86      0.78       424
           1       0.59      0.38      0.46       229

    accuracy                           0.69       653
   macro avg       0.66      0.62      0.62       653
weighted avg       0.67      0.69      0.67       653

