In [1]:
import pandas as pd
from sklearn import svm
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_validate
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Load and prepare data
data = pd.read_excel('model_updated.xlsx')
X = data[['temp_sensor', 'tds_sensor', 'ph_sensor', 'turbidity_sensor']]
y = data['fit_or_unfit']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define models
models = {
    'SVM': svm.SVC(class_weight='balanced'),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'Logistic Regression': LogisticRegression(max_iter=200)
}

# Define scoring
scoring = ['accuracy', 'precision', 'recall', 'f1']

# Store final results
final_results = {}

# Evaluate each model
for name, model in models.items():
    print(f"\n====== {name} ======")
    model.fit(X_train_scaled, y_train)

    # Training evaluation
    y_train_pred = model.predict(X_train_scaled)
    train_accuracy = accuracy_score(y_train, y_train_pred)
    train_precision = precision_score(y_train, y_train_pred)
    train_recall = recall_score(y_train, y_train_pred)
    train_f1 = f1_score(y_train, y_train_pred)

    # Testing evaluation
    y_test_pred = model.predict(X_test_scaled)
    test_accuracy = accuracy_score(y_test, y_test_pred)
    test_precision = precision_score(y_test, y_test_pred)
    test_recall = recall_score(y_test, y_test_pred)
    test_f1 = f1_score(y_test, y_test_pred)

    # Cross-validation on training set
    cv_scores = cross_validate(model, X_train_scaled, y_train, cv=5, scoring=scoring)
    cv_accuracy = cv_scores['test_accuracy'].mean()
    cv_precision = cv_scores['test_precision'].mean()
    cv_recall = cv_scores['test_recall'].mean()
    cv_f1 = cv_scores['test_f1'].mean()

    # Save all results
    final_results[name] = {
        'Train': (train_accuracy, train_precision, train_recall, train_f1),
        'Test': (test_accuracy, test_precision, test_recall, test_f1),
        'Cross-Val': (cv_accuracy, cv_precision, cv_recall, cv_f1)
    }

# Display results in table format
print("\n========== Summary Table ==========\n")
print(f"{'Model':<20}{'Set':<12}{'Acc':<8}{'Prec':<8}{'Recall':<8}{'F1':<8}")
print("-" * 60)
for model_name, results in final_results.items():
    for eval_type, metrics in results.items():
        acc, prec, rec, f1 = [round(m, 2) for m in metrics]
        print(f"{model_name:<20}{eval_type:<12}{acc:<8}{prec:<8}{rec:<8}{f1:<8}")







Model               Set         Acc     Prec    Recall  F1      
------------------------------------------------------------
SVM                 Train       0.99    0.98    1.0     0.99    
SVM                 Test        0.98    0.97    1.0     0.98    
SVM                 Cross-Val   0.99    0.98    1.0     0.99    
Decision Tree       Train       1.0     1.0     1.0     1.0     
Decision Tree       Test        1.0     1.0     1.0     1.0     
Decision Tree       Cross-Val   1.0     1.0     1.0     1.0     
Random Forest       Train       1.0     1.0     1.0     1.0     
Random Forest       Test        1.0     1.0     1.0     1.0     
Random Forest       Cross-Val   1.0     1.0     1.0     1.0     
Logistic Regression Train       0.89    0.87    0.95    0.91    
Logistic Regression Test        0.88    0.87    0.93    0.9     
Logistic Regression Cross-Val   0.89    0.87    0.95    0.91    


In [11]:
import pandas as pd
from sklearn import svm
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_validate
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Step 1: Load both datasets
main_data = pd.read_excel('model_updated.xlsx')
new_data = pd.read_csv('sensor_data_classified_rawan_modified.csv')

# Step 2: Rename columns in new_data if needed to match main_data
new_data = new_data.rename(columns={
    'temp': 'temp_sensor',
    'tds': 'tds_sensor',
    'ph': 'ph_sensor',
    'turbidity': 'turbidity_sensor',
    'classification': 'fit_or_unfit'  
})

# Step 3: Ensure both datasets have the same columns
main_data = main_data[['temp_sensor', 'tds_sensor', 'ph_sensor', 'turbidity_sensor', 'fit_or_unfit']]
new_data = new_data[['temp_sensor', 'tds_sensor', 'ph_sensor', 'turbidity_sensor', 'fit_or_unfit']]

# Step 4: Merge and shuffle
combined_data = pd.concat([main_data, new_data], ignore_index=True)
combined_data = combined_data.sample(frac=1).reset_index(drop=True)

# Optional: Save combined dataset
combined_data.to_excel('model_combined.xlsx', index=False)

# Step 5: Prepare features and labels
X = combined_data[['temp_sensor', 'tds_sensor', 'ph_sensor', 'turbidity_sensor']]
y = combined_data['fit_or_unfit']

# Step 6: Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 7: Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 8: Define models
models = {
    'SVM': svm.SVC(class_weight='balanced'),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'Logistic Regression': LogisticRegression(max_iter=200)
}

# Step 9: Evaluation setup
scoring = ['accuracy', 'precision', 'recall', 'f1']
final_results = {}

# Step 10: Train, test, and cross-validate each model
for name, model in models.items():
    print(f"\n====== {name} ======")
    model.fit(X_train_scaled, y_train)

    # Train set evaluation
    y_train_pred = model.predict(X_train_scaled)
    train_metrics = (
        accuracy_score(y_train, y_train_pred),
        precision_score(y_train, y_train_pred),
        recall_score(y_train, y_train_pred),
        f1_score(y_train, y_train_pred)
    )

    # Test set evaluation
    y_test_pred = model.predict(X_test_scaled)
    test_metrics = (
        accuracy_score(y_test, y_test_pred),
        precision_score(y_test, y_test_pred),
        recall_score(y_test, y_test_pred),
        f1_score(y_test, y_test_pred)
    )

    # Cross-validation on training set
    cv_scores = cross_validate(model, X_train_scaled, y_train, cv=5, scoring=scoring)
    cv_metrics = (
        cv_scores['test_accuracy'].mean(),
        cv_scores['test_precision'].mean(),
        cv_scores['test_recall'].mean(),
        cv_scores['test_f1'].mean()
    )

    final_results[name] = {
        'Train': train_metrics,
        'Test': test_metrics,
        'Cross-Val': cv_metrics
    }

# Step 11: Print Summary Table
print("\n========== Summary Table ==========\n")
print(f"{'Model':<20}{'Set':<12}{'Acc':<8}{'Prec':<8}{'Recall':<8}{'F1':<8}")
print("-" * 60)
for model_name, results in final_results.items():
    for eval_type, metrics in results.items():
        acc, prec, rec, f1 = [round(m, 2) for m in metrics]
        print(f"{model_name:<20}{eval_type:<12}{acc:<8}{prec:<8}{rec:<8}{f1:<8}")







Model               Set         Acc     Prec    Recall  F1      
------------------------------------------------------------
SVM                 Train       0.97    0.96    0.94    0.95    
SVM                 Test        0.97    0.97    0.94    0.95    
SVM                 Cross-Val   0.97    0.96    0.94    0.95    
Decision Tree       Train       1.0     1.0     1.0     1.0     
Decision Tree       Test        0.95    0.91    0.93    0.92    
Decision Tree       Cross-Val   0.95    0.93    0.93    0.93    
Random Forest       Train       1.0     1.0     1.0     1.0     
Random Forest       Test        0.97    0.97    0.93    0.95    
Random Forest       Cross-Val   0.97    0.99    0.93    0.96    
Logistic Regression Train       0.87    0.8     0.79    0.8     
Logistic Regression Test        0.86    0.82    0.77    0.8     
Logistic Regression Cross-Val   0.87    0.8     0.79    0.79    
