# Multiclass Logistic Regression

In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris

iris = load_iris()
X = iris.data
y = iris.target


In [2]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [3]:
from sklearn.linear_model import LogisticRegression

classifiers = []
for i in range(3):
    y_train_i = (y_train == i)
    classifier = LogisticRegression()
    classifier.fit(X_train, y_train_i)
    classifiers.append(classifier)


In [4]:
y_pred_ovr = np.zeros((len(y_test), 3))
for i, classifier in enumerate(classifiers):
    y_pred_ovr[:, i] = classifier.predict_proba(X_test)[:, 1]
    
y_pred = np.argmax(y_pred_ovr, axis=1)


In [5]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 0.9666666666666667


# GridSearchCV

In [6]:
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

# Load the iris dataset
iris = load_iris()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42)

# Define the parameter grid
param_grid = {'n_neighbors': [1, 3, 5, 7, 9, 11]}

# Create a K-NN classifier
knn = KNeighborsClassifier()

# Create a GridSearchCV object
grid_search = GridSearchCV(knn, param_grid, cv=5)

# Fit the GridSearchCV object to the data
grid_search.fit(X_train, y_train)

# Print the best value of K
print("Best K value:", grid_search.best_params_['n_neighbors'])


Best K value: 3


# K means clustering

In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler

mall_data = pd.read_csv('Mall_Customers.csv')


In [13]:
X = mall_data.iloc[:, [3, 4]].values
scaler = StandardScaler()
X = scaler.fit_transform(X)


In [14]:
k_values = [2, 3, 4, 5, 6, 7, 8, 9, 10]
silhouette_scores = []

for k in k_values:
    kmeans = KMeans(n_clusters=k, init='k-means++', random_state=42)
    kmeans.fit(X)
    labels = kmeans.predict(X)
    silhouette_scores.append(silhouette_score(X, labels))

best_k = k_values[np.argmax(silhouette_scores)]
print("Best K value:", best_k)


Best K value: 5


In [15]:
kmeans = KMeans(n_clusters=best_k, init='k-means++', random_state=42)
labels = kmeans.fit_predict(X)


In [16]:
# Create a DataFrame to store the cluster centers and size
clusters_df = pd.DataFrame(columns=['Cluster', 'Center X', 'Center Y', 'Size'])

for i in range(best_k):
    center = kmeans.cluster_centers_[i]
    size = len(X[labels == i])
    cluster_data = {'Cluster': i, 'Center X': center[0], 'Center Y': center[1], 'Size': size}
    clusters_df = clusters_df.append(cluster_data, ignore_index=True)

# Save the clusters data to a CSV file
clusters_df.to_csv('Mall_Customers_Clusters.csv', index=False)
