**problem1**

In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
import seaborn as sns

# Load the dataset
df = pd.read_csv('Breast_Cancer_Dataset.csv')

# Remove null rows and columns
df = df.dropna()

# Encode the 'diagnosis' column
le = LabelEncoder()
df['diagnosis'] = le.fit_transform(df['diagnosis'])

# Separate target variable and feature vectors
X = df.drop('diagnosis', axis=1)
y = df['diagnosis']

# Split the dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Declare the algorithms in a list
algorithms = [LogisticRegression(), GaussianNB(), DecisionTreeClassifier(), KNeighborsClassifier(), SVC(), RandomForestClassifier()]

# Iterate through algorithms to build models and calculate accuracies
for algorithm in algorithms:
    model = algorithm.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"Accuracy of {algorithm.__class__.__name__}: {model.score(X_test, y_test)}")

    # Calculate and plot the confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    sns.heatmap(cm, annot=True)


**problem2**

In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score, f1_score, classification_report
import matplotlib.pyplot as plt

# Load the dataset
df = pd.read_csv('/content/Breast_Cancer_Dataset.csv')

# Remove null rows and columns
df = df.dropna()

# Encode the 'diagnosis' column
le = LabelEncoder()
df['diagnosis'] = le.fit_transform(df['diagnosis'])

# Separate target variable and feature vectors
X = df.drop('diagnosis', axis=1)
y = df['diagnosis']

# Split the dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Declare the algorithms in a list
algorithms = [LogisticRegression(), GaussianNB(), DecisionTreeClassifier(), KNeighborsClassifier(), SVC(), RandomForestClassifier()]

# Iterate through algorithms to build models and calculate accuracies
for algorithm in algorithms:
    model = algorithm.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"Accuracy of {algorithm.__class__.__name__}: {model.score(X_test, y_test)}")

    # Calculate ROC_AUC score based on FPR and TPR
    roc_auc = roc_auc_score(y_test, y_pred)
    print(f"ROC_AUC score: {roc_auc}")

    # Calculate F1 Score
    f1 = f1_score(y_test, y_pred)
    print(f"F1 Score: {f1}")

    # Print Precision, Recall, and F1 score using classification_report() function
    print(classification_report(y_test, y_pred))

    # Plot ROC Curve using Matplotlib library
    plt.figure()
    plt.plot([0, 1], [0, 1], 'k--')
    plt.plot(fpr, tpr)
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve')
    plt.show()


**Problem 3**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

# Load the dataset
df = pd.read_csv('voice.csv')

# 1
df.rename(columns={'label': 'Gender_Identified'}, inplace=True)
print(df)
# Label encode the target column
le = LabelEncoder()
df['Gender_Identified'] = le.fit_transform(df['Gender_Identified'])
print(df)
# Separate target variable and feature vectors
X = df.drop('Gender_Identified', axis=1)
y = df['Gender_Identified']

# Define the model
model = RandomForestClassifier()

# Define the parameters for the grid search
parameters = {
    'n_estimators': [100, 200, 300, 400, 500],
    'max_depth': [2, 4, 6, 8, 10],
    'random_state': [42]
}

# Perform a Grid Search
grid_search = GridSearchCV(estimator=model, param_grid=parameters)
grid_search.fit(X, y)

# Print the best parameters and the best estimator
print(f'Best Parameters: {grid_search.best_params_}')
print(f'Best Estimator: {grid_search.best_estimator_}')


**problem 4**

In [None]:
import pandas as pd
df = pd.read_csv('seeds.csv')
#1
print(df['Type'].unique())
X = df.drop('Type', axis=1)
y = df['Type']
print(df)
print(df['Type'])
from sklearn.model_selection import train_test_split
# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
# Build a Decision Tree Classifier
dtc = DecisionTreeClassifier()
dtc.fit(X_train, y_train)
# Print the accuracy score of the Decision Tree Classifier
print('Decision Tree Classifier accuracy score:', dtc.score(X_test, y_test))
#3
gnb = GaussianNB()
gnb.fit(X_train, y_train)

In [None]:
#4
from sklearn.ensemble import AdaBoostClassifier
dtc_adaboost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(), n_estimators=100)
dtc_adaboost.fit(X_train, y_train)
print('GaussianNB model accuracy score:', gnb.score(X_test, y_test))
print('AdaBoost Classifier (Decision Tree Classifier) accuracy score:', dtc_adaboost.score(X_test, y_test))
gnb_adaboost = AdaBoostClassifier(base_estimator=GaussianNB(), n_estimators=100)
gnb_adaboost.fit(X_train, y_train)
print('AdaBoost Classifier (GaussianNB model) accuracy score:', gnb_adaboost.score(X_test, y_test))
#5
import matplotlib.pyplot as plt
plt.figure()
plt.barh(['Decision Tree Classifier', 'GaussianNB model', 'AdaBoost Classifier (Decision Tree Classifier)', 'AdaBoost Classifier (GaussianNB model)'], [0.9333333333333333, 0.9, 0.9666666666666667, 0.975])
plt.xlabel('Accuracy Score')
plt.ylabel('Model')
plt.title('Accuracy Scores of Original and AdaBoost Classifiers')
plt.show()
