## Q-1 PCA

A. To use PCA Algorithm for dimensionality reduction. You have a dataset that includes 
measurements for different variables on wine (alcohol, ash, magnesium, and so on). 
Apply PCA algorithm & transform this data so that most variations in the measurements 
of the variables are captured by a small number of principal components so that it is 
easier to distinguish between red and white wine by inspecting these principal 
components. Dataset Link: https://media.geeksforgeeks.org/wp-content/uploads/Wine.csv

In [None]:
import pandas as pd 
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

df = pd.read_csv('Wine.csv')
df

df.info()

df.isnull().sum()

# Features
X = df.drop('Customer_Segment', axis=1)
y = df['Customer_Segment'] 

#Standardize features by removing the mean and scaling to 
sc = StandardScaler() 

pca = PCA()
X_pca = pca.fit_transform(X)

explained_variance_ratio = pca.explained_variance_ratio_
plt.plot(range(1, len(explained_variance_ratio) + 1), explained_variance_ratio.cumsum(), marker='o', 
linestyle='--')
plt.xlabel('Number of Principal Components')
plt.ylabel('Cumulative Explained Variance')
plt.title('Explained Variance Ratio')
plt.show()


# Choose the desired number of principal components you want to reduce a dimention to
n_components = 12
pca = PCA(n_components=n_components)
X_pca = pca.fit_transform(X)
X_pca.shape
X.shape
red_indices = y[y == 1].index
white_indices = y[y == 2].index

plt.scatter(X_pca[red_indices, 0], X_pca[red_indices, 1], c='red', label='Red Wine')
plt.scatter(X_pca[white_indices, 0], X_pca[white_indices, 1], c='blue', label='White Wine')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend()
plt.title('PCA: Red Wine vs. White Wine')
plt.show()

## Q-2 Uber Price

Predict the price of the Uber ride from a given pickup point to the agreed drop-off 
location. Perform following tasks: 1. Pre-process the dataset.
2. Identify outliers. 3. Check the correlation. 4. Implement linear regression and 
ridge, Lasso regression models. 5. Evaluate the models and compare their respective 
scores like R2, RMSE, etc. Dataset link: 
https://www.kaggle.com/datasets/yasserh/uber-fares-dataset

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import r2_score, mean_squared_error

data = pd.read_csv('uber.csv')
data.head()

data['pickup_datetime'] = pd.to_datetime(data['pickup_datetime'])
data['hour'] = data['pickup_datetime'].dt.hour
data['day_of_week'] = data['pickup_datetime'].dt.dayofweek

data = data.drop(columns=['Unnamed: 0', 'key', 'pickup_datetime'])

data.isnull().sum()

data['dropoff_longitude'].fillna(data['dropoff_longitude'].mean(), inplace=True)
data['dropoff_latitude'].fillna(data['dropoff_latitude'].mean(), inplace=True)

data.isnull().sum()


X = data.drop(columns=['fare_amount'])
y = data['fare_amount']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

plt.figure(figsize=(8, 6))
sns.boxplot(data=data, x='fare_amount')
plt.title('Box Plot of Fare Amount')
plt.show()

Q1=data['fare_amount'].quantile(0.25)
Q3=data['fare_amount'].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
data = data[(data['fare_amount'] >= lower_bound) & (data['fare_amount'] <= upper_bound)]

plt.figure(figsize=(8, 6))
sns.boxplot(data=data, x='fare_amount')
plt.title('Box Plot of Fare Amount')
plt.show()

correlation_matrix = data.corr()
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True)
plt.title('Correlation Matrix Heatmap')
plt.show()



# Linear Regression
lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)
r2_lr = r2_score(y_test, y_pred_lr)
rmse_lr = np.sqrt(mean_squared_error(y_test, y_pred_lr))
# Ridge Regression
ridge = Ridge(alpha=1.0)  # You can adjust the alpha parameter
ridge.fit(X_train, y_train)
y_pred_ridge = ridge.predict(X_test)
r2_ridge = r2_score(y_test, y_pred_ridge)
rmse_ridge = np.sqrt(mean_squared_error(y_test, y_pred_ridge))
# Lasso Regression
lasso = Lasso(alpha=1.0)  # You can adjust the alpha parameter
lasso.fit(X_train, y_train)
y_pred_lasso = lasso.predict(X_test)
r2_lasso = r2_score(y_test, y_pred_lasso)
rmse_lasso = np.sqrt(mean_squared_error(y_test, y_pred_lasso))
# Print results
print("Linear Regression - R2:", r2_lr, "RMSE:", rmse_lr)
print("Ridge Regression - R2:", r2_ridge, "RMSE:", rmse_ridge)
print("Lasso Regression - R2:", r2_lasso, "RMSE:", rmse_lasso)


# Q3-KNN On Social N/W Dataset

1) Implementation of Support Vector Machines (SVM) for classifying images of
handwritten digits into their respective numerical classes (0 to 9).
2) 
Implement K-Nearest Neighbours‟ algorithm on Social network ad dataset.
Compute confusion matrix, accuracy, error rate, precision and recall on the given
dataset.
Dataset link:https://www.kaggle.com/datasets/rakeshrau/social-network-ads

In [None]:
#KNN Code

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

data = pd.read_csv("Social_Network_Ads.csv")
data.head()

data.isnull().sum()

X = data[['Age', 'EstimatedSalary']]
y = data['Purchased']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

knn = KNeighborsClassifier(n_neighbors=5) 
knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)

# Calculate the confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix)

# Calculate and print accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Use classification_report to get precision, recall, and F1-score
print("\nClassification Report:\n", classification_report(y_test, y_pred))




# SVM Code


from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1)
X, y = mnist.data, mnist.target.astype(int)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

from sklearn.svm import SVC
svm_classifier = SVC(kernel='linear')
svm_classifier.fit(X_train, y_train)


from sklearn.metrics import accuracy_score
y_pred = svm_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


import matplotlib.pyplot as plt
import numpy as np
n_samples_to_visualize = 10
random_indices = np.random.randint(0, len(X_test), n_samples_to_visualize)
predicted_labels = svm_classifier.predict(X_test[random_indices])
plt.figure(figsize=(12, 6))
for i, idx in enumerate(random_indices):
    plt.subplot(2, 5, i + 1)
    plt.imshow(X_test[idx].reshape(28, 28), cmap='gray')
    plt.axis('off')
plt.tight_layout()
plt.show()



from sklearn.manifold import Isomap
from sklearn.datasets import load_digits # Import load_digits to load the dataset
# Load the digits dataset
digits = load_digits()
iso = Isomap(n_components=2)
projection = iso.fit_transform(digits.data) # digits.data - 64 dimensions to 2
import matplotlib.pyplot as plt
plt.scatter(projection[:, 0], projection[:, 1], c=digits.target, cmap="viridis")
plt.colorbar(ticks=range(10), label='Digits Value')
plt.clim(-0.5, 5.5)



def view_digit(index):
    plt.imshow(digits.images[index] , cmap = plt.cm.gray_r)
    plt.title('Orignal it is: '+ str(digits.target[index]))
    plt.show()
view_digit(4)



from sklearn.metrics import confusion_matrix
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
# Assuming 'svm_classifier' is your trained model, 'X_test' your test data and 'y_test' your test labels
predictions = svm_classifier.predict(X_test)  # Generate predictions
# Use y_test instead of targets assuming y_test contains the ground truth labels for X_test
cm = confusion_matrix(y_test, predictions) # Assuming y_test has shape (14000,)
conf_matrix = pd.DataFrame(data=cm)
plt.figure(figsize=(8, 5))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap="YlGnBu")
plt.show()

# Q-4 K-Mean

A.Implement K-Means clustering on Iris.csv dataset. Determine the number of
clustersusing the elbow method. Dataset Link:
https://www.kaggle.com/datasets/uciml/iris

In [None]:
import pandas as pd
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

data = pd.read_csv('iris.csv')
data.head()

data.info()

# Remove the target column
X = data.drop('Species', axis=1) 
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 5: Use the elbow method to determine the optimal number of clusters
inertia = []
K = range(1, 11)  # Try from 1 to 10 clusters
for k in K:
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X_scaled)
    inertia.append(kmeans.inertia_)

# Plot the elbow curve
plt.figure(figsize=(10, 6))
plt.plot(K, inertia, 'bo-')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('Inertia')
plt.title('Elbow Method for Optimal k')
plt.show()

# Step 6: Train K-Means with the optimal number of clusters
# Assuming the elbow method suggests 3 clusters for the Iris dataset
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(X_scaled)

# Step 7: Add the cluster labels to the dataset
data['Cluster'] = kmeans.labels_
data.head()


# Optional: Visualize the clusters if using 2D or 3D plots
plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=kmeans.labels_, cmap='viridis', marker='o')
plt.xlabel('Feature 1 (scaled)')
plt.ylabel('Feature 2 (scaled)')
plt.title('K-Means Clustering (3 clusters)')
plt.show()

# Q-5 Random Forest Classifier

A. Implement Random Forest Classifier model to predict the safety of the car. Dataset 
link: https://www.kaggle.com/datasets/elikplim/car-evaluation-data-set

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
import category_encoders as ce
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

df=pd.read_csv('car_evaluation.csv')
df.head()
df.describe()

col_names = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'class']
df.columns=col_names
col_names

df.head()

for col in col_names:
    print(df[col].value_counts())

df.isnull().sum()

x=df.drop(['class'],axis=1)
y=df['class']

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=42)
x_train.shape,x_test.shape

encoder = ce.OrdinalEncoder(cols=['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety'])
x_train = encoder.fit_transform(x_train)
x_test = encoder.transform(x_test)

x_train.head()

rfc=RandomForestClassifier(random_state=0)
rfc.fit(x_train,y_train)

y_pred=rfc.predict(x_test)
accuracy_score(y_test,y_pred)

rfc_100 = RandomForestClassifier(n_estimators=100, random_state=0)
rfc_100.fit(x_train, y_train)

y_pred_100=rfc_100.predict(x_test)
accuracy_score(y_test,y_pred_100)

## Q-6 Reinforcement Learning

Implement Reinforcement Learning using an example of a maze environment that the 
agent needs to explore.

In [None]:
import numpy as np

def create_maze():
    rows = int(input("Enter the number of rows: "))
    cols = int(input("Enter the number of columns: "))
    maze = np.zeros((rows, cols), dtype=int)
    print("Enter the maze layout row by row (0 for free space, 1 for wall):")
    for row in range(rows):
        while True:
            try:
                row_data = list(map(int, input().strip().split()))
                if len(row_data) != cols:
                    raise ValueError(f"Expected {cols} columns, but got {len(row_data)}.")
                maze[row] = row_data
                break
            except ValueError as e:
                print(f"Error: {e}. Please enter {cols} integers separated by spaces.")
    return maze

maze = create_maze()

class QLearningAgent:
    def __init__(self, num_states, num_actions, learning_rate=0.1, discount_factor=0.9, exploration_prob=0.2):
        self.num_states = num_states
        self.num_actions = num_actions
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_prob = exploration_prob
        self.q_table = np.zeros((num_states, num_actions))

    def choose_action(self, state):
        if np.random.rand() < self.exploration_prob:
            return np.random.choice(self.num_actions)
        else:
            return np.argmax(self.q_table[state])

    def learn(self, state, action, reward, next_state):
        predicted = self.q_table[state, action]
        target = reward + self.discount_factor * np.max(self.q_table[next_state])
        self.q_table[state, action] += self.learning_rate * (target - predicted)

# Map 2D maze coordinates to 1D state space
def get_state(row, col, cols):
    return row * cols + col

def get_coordinates(state, cols):
    return divmod(state, cols)

# Initialize agent
num_states = maze.size
num_actions = 4

initial_state = get_state(0, 0, maze.shape[1])
goal_state = get_state(maze.shape[0] - 1, maze.shape[1] - 1, maze.shape[1])

agent = QLearningAgent(num_states, num_actions)

def train_agent(agent, num_episodes=1000):
    for episode in range(num_episodes):
        state = initial_state
        done = False
        while not done:
            action = agent.choose_action(state)
            row, col = get_coordinates(state, maze.shape[1])
            next_state = state
            if action == 0:  # Move Up
                if row > 0:
                    next_row, next_col = row - 1, col
                    next_state = get_state(next_row, next_col, maze.shape[1])
            elif action == 1:  # Move Down
                if row < maze.shape[0] - 1:
                    next_row, next_col = row + 1, col
                    next_state = get_state(next_row, next_col, maze.shape[1])
            elif action == 2:  # Move Left
                if col > 0:
                    next_row, next_col = row, col - 1
                    next_state = get_state(next_row, next_col, maze.shape[1])
            elif action == 3:  # Move Right
                if col < maze.shape[1] - 1:
                    next_row, next_col = row, col + 1
                    next_state = get_state(next_row, next_col, maze.shape[1])

            if (0 <= next_state < num_states) and (maze.flat[next_state] == 0):  # Check if the move is valid
                if next_state == goal_state:
                    reward = 1  # Reached the goal
                    done = True
                else:
                    reward = 0  # Moved to an empty cell
                agent.learn(state, action, reward, next_state)
                state = next_state

train_agent(agent, num_episodes=1000)

def test_agent(agent):
    state = initial_state
    while state != goal_state:
        action = agent.choose_action(state)
        row, col = get_coordinates(state, maze.shape[1])
        print(f"Current State: ({row}, {col}), Chosen Action: {action}")
        if action == 0:
            state = get_state(row - 1, col, maze.shape[1])
        elif action == 1:
            state = get_state(row + 1, col, maze.shape[1])
        elif action == 2:
            state = get_state(row, col - 1, maze.shape[1])
        elif action == 3:
            state = get_state(row, col + 1, maze.shape[1])
        print(f"New State: ({get_coordinates(state, maze.shape[1])})")
    print("Agent reached the goal!")

test_agent(agent)

o/p:-

Enter the number of rows:  3
Enter the number of columns:  3
Enter the maze layout row by row (0 for free space, 1 for wall):
 0 0 0
 1 1 0
 0 0 0
Current State: (0, 0), Chosen Action: 3
New State: ((0, 1))
Current State: (0, 1), Chosen Action: 3
New State: ((0, 2))
Current State: (0, 2), Chosen Action: 1
New State: ((1, 2))
Current State: (1, 2), Chosen Action: 1
New State: ((2, 2))
Agent reached the goal!