In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor

# Load existing dataset
data = pd.read_csv('hr_salary_data.csv')

# Prepare data
X = data[['experience', 'written_score', 'interview_score']]
y = data['salary']

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Test with different K values
k_values = [1, 3, 5, 7, 9]
for k in k_values:
    knn = KNeighborsRegressor(n_neighbors=k)
    knn.fit(X_scaled, y)

    # Test candidates
    test_data = np.array([[5, 8, 10], [8, 7, 6]])
    test_scaled = scaler.transform(test_data)
    predictions = knn.predict(test_scaled)

    print(f"K = {k}")
    print(f"Candidate A salary: ${predictions[0]}")
    print(f"Candidate B salary: ${predictions[1]}")

K = 1
Candidate A salary: $110000.0
Candidate B salary: $112000.0
K = 3
Candidate A salary: $126666.66666666667
Candidate B salary: $117333.33333333333
K = 5
Candidate A salary: $123000.0
Candidate B salary: $106200.0
K = 7
Candidate A salary: $116571.42857142857
Candidate B salary: $111571.42857142857
K = 9
Candidate A salary: $112444.44444444444
Candidate B salary: $112444.44444444444




In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load existing dataset
data = pd.read_csv('hr_selection_data.csv')

# Split data
X = data[['grad_percent', 'experience', 'written_score', 'interview_score']]
y = data['selection']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Naive Bayes model
nb = GaussianNB()
nb.fit(X_train, y_train)

# Evaluate on test data
y_pred = nb.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, zero_division=0)
recall = recall_score(y_test, y_pred, zero_division=0)
f1 = f1_score(y_test, y_pred, zero_division=0)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

# Predict for given candidates
candidates = np.array([[90, 5, 8, 10], [75, 8, 7, 6]])
predictions = nb.predict(candidates)
probabilities = nb.predict_proba(candidates)

print(f"Candidate A selection: {predictions[0]}, Prob: {probabilities[0][1]}")
print(f"Candidate B selection: {predictions[1]}, Prob: {probabilities[1][1]}")

# Save metrics
metrics_df = pd.DataFrame({
    'accuracy': [accuracy],
    'precision': [precision],
    'recall': [recall],
    'f1_score': [f1]
})
metrics_df.to_csv('classification_metrics.csv', index=False)

Accuracy: 1.0
Precision: 0.0
Recall: 0.0
F1 Score: 0.0
Candidate A selection: 1, Prob: 1.0
Candidate B selection: 1, Prob: 1.0




In [None]:
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Try different training sizes
train_sizes = [0.5, 0.6, 0.7, 0.8, 0.9]
for size in train_sizes:
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=size, random_state=42)

    dt = DecisionTreeClassifier(criterion='entropy', random_state=42)
    dt.fit(X_train, y_train)

    y_pred = dt.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

    print(f"Train size: {size}, Accuracy: {accuracy}")

# Try different max_depth values with 80% training data
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=42)
depths = [3, 5, 7, 10, None]

for depth in depths:
    dt = DecisionTreeClassifier(criterion='entropy', max_depth=depth, random_state=42)
    dt.fit(X_train, y_train)

    y_pred = dt.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

    print(f"Max depth: {depth}, Accuracy: {accuracy}")

Train size: 0.5, Accuracy: 0.92
Train size: 0.6, Accuracy: 0.9833333333333333
Train size: 0.7, Accuracy: 0.9777777777777777
Train size: 0.8, Accuracy: 1.0
Train size: 0.9, Accuracy: 1.0
Max depth: 3, Accuracy: 1.0
Max depth: 5, Accuracy: 1.0
Max depth: 7, Accuracy: 1.0
Max depth: 10, Accuracy: 1.0
Max depth: None, Accuracy: 1.0


In [None]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load iris dataset
X, y = load_iris(return_X_y=True)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

results = []

# KNN models with different k
for k in [1, 3, 5]:
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')

    results.append({
        'model': f'KNN (k={k})',
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1
    })

# Decision Tree models with different depths
for depth in [3, 5, None]:
    dt = DecisionTreeClassifier(criterion='entropy', max_depth=depth, random_state=42)
    dt.fit(X_train, y_train)
    y_pred = dt.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')

    results.append({
        'model': f'DT (depth={depth})',
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1
    })

# Naive Bayes model
nb = GaussianNB()
nb.fit(X_train, y_train)
y_pred = nb.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')

results.append({
    'model': 'Naive Bayes',
    'accuracy': accuracy,
    'precision': precision,
    'recall': recall,
    'f1_score': f1
})

# Create and save results
results_df = pd.DataFrame(results)
results_df.to_csv('model_comparison.csv', index=False)
print(results_df)

             model  accuracy  precision  recall  f1_score
0        KNN (k=1)       1.0        1.0     1.0       1.0
1        KNN (k=3)       1.0        1.0     1.0       1.0
2        KNN (k=5)       1.0        1.0     1.0       1.0
3     DT (depth=3)       1.0        1.0     1.0       1.0
4     DT (depth=5)       1.0        1.0     1.0       1.0
5  DT (depth=None)       1.0        1.0     1.0       1.0
6      Naive Bayes       1.0        1.0     1.0       1.0
