# Model Traning and Validation

Libraries and Modules

In [None]:
import pandas as pd
from sklearn.model_selection import cross_val_score, train_test_split, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import joblib

The Random Forest model utilizes extracted features saved in .csv files for testing and training. It also employs k-fold cross-validation techniques to validate the model and saves the model in a .pkl file. Check for grammatical mistakes.

In [None]:
import pandas as pd
from sklearn.model_selection import cross_val_score, train_test_split, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import joblib

# Load the preprocessed data from the CSV file
df = pd.read_csv('lbp_and_glcm_features.csv')

# Split the dataset into features (X) and target labels (y)
X = df.drop('Defect', axis=1)
y = df['Defect']

# Initialize the Random Forest classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)

# Perform K-Fold cross-validation and save models
cv_scores = cross_val_score(clf, X, y, cv=5)  
print(f"Cross-Validation Mean Accuracy: {cv_scores.mean():.2f} (+/- {cv_scores.std() * 2:.2f})")

# Optionally, train the model on the full dataset
clf.fit(X, y)

# Save the trained model to a file
joblib.dump(clf, 'random_forest_model.pkl')

# Optionally, evaluate the model on the test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
accuracy = clf.score(X_test, y_test)
print(f"Accuracy on Test Data: {accuracy:.2f}")

# Generate a classification report for each fold
for fold, (train_idx, test_idx) in enumerate(StratifiedKFold(n_splits=5, shuffle=True, random_state=42).split(X, y)):
    X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
    X_test, y_test = X.iloc[test_idx], y.iloc[test_idx]
    
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    
    report = classification_report(y_test, y_pred)
    print(f"Classification Report for Fold {fold + 1}:\n{report}\n")
