<h1> Step 1: Implementing KNN</h1>

In [3]:
import numpy as np
import pandas as pd
import math

# Dataset
data = {'age': [21, 20, 22, 22, 23, 21, 25, 30, 31, 22],
        'income': [60, 55, 60, 61, 65, 62, 65, 70, 68, None]}

df = pd.DataFrame(data)

# Input value
X = 22

# Function to calculate Euclidean distance
def euclidean_distance(x1, x2):
    return abs(x1 - x2)

# Apply the KNN algorithm with k=3
def knn_predict(df, X, k=3):
    # Calculate the distance of all points from the input X
    df['distance'] = df['age'].apply(lambda age: euclidean_distance(age, X))
    
    # Sort by distance and pick the k nearest neighbors
    neighbors = df[df['income'].notna()].sort_values(by='distance').head(k)
    
    # Predict income as the mean of the nearest neighbors' income
    predicted_income = neighbors['income'].mean()
    
    return predicted_income, neighbors

predicted_income_knn, neighbors_knn = knn_predict(df, X)

print("Predicted Income (KNN):", predicted_income_knn)
print("Neighbors Used in Prediction (KNN):")
print(neighbors_knn)


Predicted Income (KNN): 60.333333333333336
Neighbors Used in Prediction (KNN):
   age  income  distance
2   22    60.0         0
3   22    61.0         0
0   21    60.0         1


<h1>Step 2: Implementing Linear Regression</h1>

In [6]:
from sklearn.linear_model import LinearRegression

# Prepare data for Linear Regression (excluding rows with missing income values)
df_clean = df.dropna()

# Reshape the data
X_train = np.array(df_clean['age']).reshape(-1, 1)
y_train = np.array(df_clean['income'])

# Fit the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict the income for X = 22
predicted_income_lr = model.predict(np.array([[X]]))[0]

print("Predicted Income (Linear Regression):", predicted_income_lr)


Predicted Income (Linear Regression): 60.97068965517241


<h1>Step 3: Generating Excel Spreadsheet</h1>

In [9]:
# Adding the predicted incomes to the dataframe
df.loc[df['age'] == X, 'predicted_income_knn'] = predicted_income_knn
df.loc[df['age'] == X, 'predicted_income_lr'] = predicted_income_lr

# Save to Excel
output_filename = 'income_prediction.xlsx'
df.to_excel(output_filename, index=False)

output_filename


'income_prediction.xlsx'