In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix


file_path = r"C:\Users\wproj\Downloads\Datasets\csv_output\dataset_xml_format.csv"
data = pd.read_csv(file_path)


print("Initial Data:")
print(data.head())


numeric_cols = data.select_dtypes(include=['float64']).columns
non_numeric_cols = data.select_dtypes(exclude=['float64']).columns


data[numeric_cols] = data[numeric_cols].fillna(data[numeric_cols].mean())


data[non_numeric_cols] = data[non_numeric_cols].fillna("Unknown")


print("\nMissing Values After Filling:")
print(data.isnull().sum())


sns.pairplot(data[numeric_cols])
plt.show()



target_column = 'target_column_name' 

X = data.drop(columns=[target_column])
y = data[target_column]


X = pd.get_dummies(X, drop_first=True)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


model = RandomForestClassifier(random_state=42)
model.fit(X_train_scaled, y_train)


y_pred = model.predict(X_test_scaled)


print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))


feature_importances = model.feature_importances_
features = X.columns
feature_imp_df = pd.DataFrame({'Feature': features, 'Importance': feature_importances})
feature_imp_df = feature_imp_df.sort_values(by='Importance', ascending=False)

print("\nFeature Importance:")
print(feature_imp_df) 


Initial Data:
        filename                                          file_path file_type  \
0  foto00088.png  C:\Users\wproj\Downloads\New folder (8)\datase...       png   
1  foto00088.xml  C:\Users\wproj\Downloads\New folder (8)\datase...       xml   
2  foto00117.png  C:\Users\wproj\Downloads\New folder (8)\datase...       png   
3  foto00117.xml  C:\Users\wproj\Downloads\New folder (8)\datase...       xml   
4  foto00204.png  C:\Users\wproj\Downloads\New folder (8)\datase...       png   

  annotation       folder                                               path  \
0        NaN          NaN                                                NaN   
1       \n\t  video.cikti  C:\Users\John Doe.DESKTOP-QVG68SG\Desktop\vide...   
2        NaN          NaN                                                NaN   
3       \n\t  video.cikti  C:\Users\John Doe.DESKTOP-QVG68SG\Desktop\vide...   
4        NaN          NaN                                                NaN   

   source database