<a href="https://colab.research.google.com/github/Harsh-D-7/Data-Mining/blob/main/Experiment-6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd

# Play Cricket dataset
data = {
    'Outlook':    ['Sunny','Sunny','Overcast','Rain','Rain','Rain','Overcast','Sunny','Sunny','Rain','Sunny','Overcast','Overcast','Rain'],
    'Temperature':['Hot','Hot','Hot','Mild','Cool','Cool','Cool','Mild','Cool','Mild','Mild','Mild','Hot','Mild'],
    'Humidity':   ['High','High','High','High','Normal','Normal','Normal','High','Normal','Normal','Normal','High','Normal','High'],
    'Wind':       ['Weak','Strong','Weak','Weak','Weak','Strong','Strong','Weak','Weak','Weak','Strong','Strong','Weak','Strong'],
    'PlayCricket':['No','No','Yes','Yes','Yes','No','Yes','No','Yes','Yes','Yes','Yes','Yes','No']
}

df = pd.DataFrame(data)
df


Unnamed: 0,Outlook,Temperature,Humidity,Wind,PlayCricket
0,Sunny,Hot,High,Weak,No
1,Sunny,Hot,High,Strong,No
2,Overcast,Hot,High,Weak,Yes
3,Rain,Mild,High,Weak,Yes
4,Rain,Cool,Normal,Weak,Yes
5,Rain,Cool,Normal,Strong,No
6,Overcast,Cool,Normal,Strong,Yes
7,Sunny,Mild,High,Weak,No
8,Sunny,Cool,Normal,Weak,Yes
9,Rain,Mild,Normal,Weak,Yes


In [2]:
%%bash
cat > PlayCricketBayes.java << 'EOF'
import java.util.*;

public class PlayCricketBayes {
    static String[][] data = {
        { "Sunny", "Hot",    "High",   "Weak",   "No" },
        { "Sunny", "Hot",    "High",   "Strong", "No" },
        { "Overcast","Hot",  "High",   "Weak",   "Yes"},
        { "Rain",  "Mild",   "High",   "Weak",   "Yes"},
        { "Rain",  "Cool",   "Normal", "Weak",   "Yes"},
        { "Rain",  "Cool",   "Normal", "Strong", "No" },
        { "Overcast","Cool", "Normal", "Strong", "Yes"},
        { "Sunny", "Mild",   "High",   "Weak",   "No" },
        { "Sunny", "Cool",   "Normal", "Weak",   "Yes"},
        { "Rain",  "Mild",   "Normal", "Weak",   "Yes"},
        { "Sunny", "Mild",   "Normal", "Strong", "Yes"},
        { "Overcast","Mild", "High",   "Strong", "Yes"},
        { "Overcast","Hot",  "Normal", "Weak",   "Yes"},
        { "Rain",  "Mild",   "High",   "Strong", "No" }
    };

    public static void main(String[] args) {
        String[] features = { "Outlook", "Temperature", "Humidity", "Wind" };
        String[] sample = { "Sunny", "Cool", "High", "Strong" };
        Map<String, Integer> classCounts = new HashMap<>();
        List<Map<String, Map<String, Integer>>> featureCounts = new ArrayList<>();
        for(int i=0; i<features.length; i++) {
            featureCounts.add(new HashMap<>());
        }
        for (String[] row : data) {
            String cls = row[4];
            classCounts.put(cls, classCounts.getOrDefault(cls, 0) + 1);
            for (int i = 0; i < 4; i++) {
                featureCounts.get(i)
                    .computeIfAbsent(row[i], k -> new HashMap<>())
                    .merge(cls, 1, Integer::sum);
            }
        }
        int total = data.length;
        System.out.println("Java Bayes Class Probabilities:");
        for (String cls : classCounts.keySet()) {
            double prior = (double)classCounts.get(cls) / total;
            double posterior = prior;
            for (int i = 0; i < sample.length; i++) {
                Map<String, Integer> counts = featureCounts.get(i).get(sample[i]);
                int count = counts==null? 0 : counts.getOrDefault(cls, 0);
                count += 1; // Laplace
                int vocab = featureCounts.get(i).keySet().size();
                posterior *= (double)count / (classCounts.get(cls) + vocab);
            }
            System.out.printf("P(%s) = %.5f%n", cls, posterior);
        }
    }
}
EOF
javac PlayCricketBayes.java
java PlayCricketBayes


Java Bayes Class Probabilities:
P(No) = 0.01822
P(Yes) = 0.00708


In [3]:
# Python verification with correct encoding
import pandas as pd
from sklearn.naive_bayes import CategoricalNB
from sklearn.preprocessing import LabelEncoder

# Play Cricket dataset
df = pd.DataFrame({
    'Outlook':    ['Sunny','Sunny','Overcast','Rain','Rain','Rain','Overcast','Sunny','Sunny','Rain','Sunny','Overcast','Overcast','Rain'],
    'Temperature':['Hot','Hot','Hot','Mild','Cool','Cool','Cool','Mild','Cool','Mild','Mild','Mild','Hot','Mild'],
    'Humidity':   ['High','High','High','High','Normal','Normal','Normal','High','Normal','Normal','Normal','High','Normal','High'],
    'Wind':       ['Weak','Strong','Weak','Weak','Weak','Strong','Strong','Weak','Weak','Weak','Strong','Strong','Weak','Strong'],
    'PlayCricket':['No','No','Yes','Yes','Yes','No','Yes','No','Yes','Yes','Yes','Yes','Yes','No']
})

# Separate features and target
X = df.drop('PlayCricket', axis=1)
y = df['PlayCricket']

# Create a LabelEncoder for each feature column
encoders = {}
for col in X.columns:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])
    encoders[col] = le

# Encode target
le_target = LabelEncoder()
y = le_target.fit_transform(y)

# Train CategoricalNB
model = CategoricalNB(alpha=1.0)
model.fit(X, y)

# Prepare sample and transform using stored encoders
sample = pd.DataFrame([['Sunny','Cool','High','Strong']], columns=X.columns)
for col, le in encoders.items():
    sample[col] = le.transform(sample[col])

# Predict probabilities
probs = model.predict_proba(sample)[0]
classes = model.classes_
print("Python Class Probabilities:")
for cls_idx, p in zip(classes, probs):
    print(f"{le_target.inverse_transform([cls_idx])[0]}: {p:.5f}")


Python Class Probabilities:
No: 0.72007
Yes: 0.27993
