In [17]:
import pandas as pd

# Read training data and labels
train_X = pd.read_csv("trainNaive.csv")
train_Y = pd.read_csv("trainNaiveLabels.csv")

# Read test data
test_X = pd.read_csv("testNaive.csv")


In [19]:
def compute_probabilities(train_X, train_Y):
    # Compute prior probabilities P(y)
    total_samples = len(train_Y)
    classes, class_counts = np.unique(train_Y.values.flatten(), return_counts=True)
    prior_probs = dict(zip(classes, class_counts / total_samples))

    # Compute conditional probabilities P(xi|y) for each feature xi and class y
    conditional_probs = {}
    for feature in train_X.columns:
        conditional_probs[feature] = {}
        for cls in classes:
            subset = train_X[train_Y.values.flatten() == cls][feature]
            value_counts = subset.value_counts()
            total_count = len(subset)
            conditional_probs[feature][cls] = value_counts / total_count

    return prior_probs, conditional_probs

def predict(test_X, prior_probs, conditional_probs):
    predictions = []
    for index, row in test_X.iterrows():
        max_prob = -1
        predicted_class = None
        for cls, prior_prob in prior_probs.items():
            prob = prior_prob
            for feature, value in row.items():
                if feature in conditional_probs and value in conditional_probs[feature][cls]:
                    prob *= conditional_probs[feature][cls][value]
                else:
                    # Laplace smoothing for unseen values
                    prob *= 1 / (len(train_X[feature].unique()) + 1)
            if prob > max_prob:
                max_prob = prob
                predicted_class = cls
        predictions.append(predicted_class)
    return predictions

prior_probs, conditional_probs = compute_probabilities(train_X, train_Y)

predictions = predict(test_X, prior_probs, conditional_probs)

# Output predictions
print("Predictions for test data:")
for i, pred in enumerate(predictions):
    print(f"Instance {i+1}: {pred}")


Predictions for test data:
Instance 1: yes
Instance 2: yes
Instance 3: no
Instance 4: yes
Instance 5: no


In [18]:
import pandas as pd

# Read training data and labels
train_X = pd.read_csv("trainNaive.csv")
train_Y = pd.read_csv("trainNaiveLabels.csv")

# Read test data
test_X = pd.read_csv("testNaive.csv")

# Step 1: Data Preprocessing
# Convert categorical variables to numerical values
# Define mapping for Outlook, Temperature, Humidity, and Windy
outlook_map = {'sunny': 0, 'overcast': 1, 'rainy': 2}
temperature_map = {'hot': 0, 'mild': 1, 'cool': 2}
humidity_map = {'high': 0, 'normal': 1}
windy_map = {False: 0, True: 1}

# Apply mapping to training and test data
train_X['Outlook'] = train_X['Outlook'].map(outlook_map)
train_X['Temperature'] = train_X['Temperature'].map(temperature_map)
train_X['Humidity'] = train_X['Humidity'].map(humidity_map)
train_X['Windy'] = train_X['Windy'].map(windy_map)

test_X['Outlook'] = test_X['Outlook'].map(outlook_map)
test_X['Temperature'] = test_X['Temperature'].map(temperature_map)
test_X['Humidity'] = test_X['Humidity'].map(humidity_map)
test_X['Windy'] = test_X['Windy'].map(windy_map)

# Step 2: Training
# Calculate probabilities for each feature and class label
# Count occurrences of each feature given each class label
class_counts = train_Y['Play'].value_counts(normalize=True)
feature_counts = {}
for feature in train_X.columns:
    feature_counts[feature] = {}
    for label in train_Y['Play'].unique():
        feature_counts[feature][label] = train_X[train_Y['Play'] == label][feature].value_counts(normalize=True)

# Step 3: Classification
predictions = []
for i in range(len(test_X)):
    probabilities = {}
    for label in train_Y['Play'].unique():
        probabilities[label] = class_counts[label]
        for feature in test_X.columns:
            probabilities[label] *= feature_counts[feature][label].get(test_X.iloc[i][feature], 0)
    predicted_label = max(probabilities, key=probabilities.get)
    predictions.append(predicted_label)

# Print predictions
print(predictions)


['yes', 'yes', 'yes', 'yes', 'no']
