In [None]:

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
 
# Assuming the data does not have a header and is comma-separated
# You might need to adjust the path to where your file is located
data_path = "C:\\Users\\latha\\Downloads\\Assignment\\2024 Assignment\\2004 - AI Exp\\breast+cancer+wisconsin+diagnostic\\wdbc.data"
 
# Define column names manually based on the dataset description
column_names = ['ID', 'Diagnosis'] + [f'feature_{i}' for i in range(1, 31)]
 
# Load the dataset
df = pd.read_csv(data_path, header=None, names=column_names)
 
# Map diagnosis to a binary variable
df['Diagnosis'] = df['Diagnosis'].map({'M': 1, 'B': 0})
 
# Display the first few rows of the dataframe
print(df.head())
 



In [None]:

# Distribution of target variable
plt.figure(figsize=(6, 4))
sns.countplot(x='Diagnosis', data=df)
plt.title('Distribution of Target Classes')
plt.show()
 
# Correlation heatmap
plt.figure(figsize=(10, 10))
sns.heatmap(df.corr(), annot=False, cmap='coolwarm')
plt.title('Feature Correlation Heatmap')
plt.show()
 
# Splitting the dataset into training and testing sets
X = df.drop('Diagnosis', axis=1)
y = df['Diagnosis']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
 
# Standardizing the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
 
# Initialize and train the Random Forest classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)
 
# Predictions
y_pred_rf = rf_model.predict(X_test_scaled)

In [None]:
# Evaluating the model
accuracy_rf = accuracy_score(y_test, y_pred_rf)
conf_matrix_rf = confusion_matrix(y_test, y_pred_rf)
print(f'Accuracy of Random Forest: {accuracy_rf}')
print('Confusion Matrix of Random Forest:')
print(conf_matrix_rf)