In [1]:
# Naive Bayes from Scratch for Student Placement Prediction

from collections import Counter, defaultdict

# -------------------------
# Training Dataset
# -------------------------
data = [
    ("High", "Yes", "Good", "Placed"),
    ("Medium", "Yes", "Good", "Placed"),
    ("Low", "No", "Poor", "Not Placed"),
    ("Medium", "No", "Average", "Not Placed"),
    ("High", "Yes", "Average", "Placed"),
    ("Low", "No", "Poor", "Not Placed"),
    ("High", "No", "Good", "Placed"),
    ("Medium", "No", "Poor", "Not Placed"),
    ("High", "Yes", "Good", "Placed"),
    ("Low", "No", "Average", "Not Placed")
]

# -------------------------
# Step 1: Count Class Labels
# -------------------------
labels = [row[3] for row in data]
class_count = Counter(labels)
total_samples = len(data)

# -------------------------
# Step 2: Calculate Priors
# -------------------------
priors = {}
for label in class_count:
    priors[label] = class_count[label] / total_samples

# -------------------------
# Step 3: Calculate Conditional Probabilities
# -------------------------
conditional = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))

for cgpa, internship, communication, label in data:
    conditional[label]["CGPA"][cgpa] += 1
    conditional[label]["Internship"][internship] += 1
    conditional[label]["Communication"][communication] += 1

# Convert frequencies into probabilities
for label in conditional:
    for feature in conditional[label]:
        total = sum(conditional[label][feature].values())
        for value in conditional[label][feature]:
            conditional[label][feature][value] /= total

# -------------------------
# Step 4: Prediction Function
# -------------------------
def predict(sample):
    cgpa, internship, communication = sample
    probabilities = {}

    for label in priors:
        prob = priors[label]
        prob *= conditional[label]["CGPA"].get(cgpa, 0)
        prob *= conditional[label]["Internship"].get(internship, 0)
        prob *= conditional[label]["Communication"].get(communication, 0)
        probabilities[label] = prob

    return max(probabilities, key=probabilities.get), probabilities

# -------------------------
# Step 5: Test with New Input
# -------------------------
test_sample = ("High", "Yes", "Good")
result, values = predict(test_sample)

print("Test Student Details:", test_sample)
print("Class Probabilities:", values)
print("Predicted Class:", result)


Test Student Details: ('High', 'Yes', 'Good')
Class Probabilities: {'Placed': 0.25600000000000006, 'Not Placed': 0.0}
Predicted Class: Placed


In [1]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# -----------------------------
# Dataset
# -----------------------------
reviews = [
    "movie was amazing",
    "film was very good",
    "acting was excellent",
    "excellent direction",
    "very good acting",
    "story was boring",
    "movie was terrible",
    "film was bad",
    "boring movie",
    "bad story"
]

labels = [
    "Positive", "Positive", "Positive", "Positive", "Positive",
    "Negative", "Negative", "Negative", "Negative", "Negative"
]

# -----------------------------
# Convert Text to Bag-of-Words
# -----------------------------
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(reviews)

# -----------------------------
# Train-Test Split
# -----------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, labels, test_size=0.2, random_state=42
)

# -----------------------------
# Train Naïve Bayes Model
# -----------------------------
model = MultinomialNB()
model.fit(X_train, y_train)

# -----------------------------
# Prediction
# -----------------------------
y_pred = model.predict(X_test)

# -----------------------------
# Accuracy
# -----------------------------
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# -----------------------------
# Test with New Sentence
# -----------------------------
test_sentence = ["excellent movie"]
test_vector = vectorizer.transform(test_sentence)
prediction = model.predict(test_vector)

print("Test Sentence:", test_sentence[0])
print("Predicted Class:", prediction[0])


Accuracy: 1.0
Test Sentence: excellent movie
Predicted Class: Positive


In [20]:
from transformers import pipeline

# Force Hugging Face to use PyTorch (not TensorFlow)
classifier = pipeline(
    "sentiment-analysis",
    model="distilbert-base-uncased-finetuned-sst-2-english",
    framework="pt"
)

text = "Not a good movie"

result = classifier(text)

print("Text:", text)
print("Prediction:", result[0]["label"], "(Score:", round(result[0]["score"], 2), ")")


Device set to use cpu


Text: Not a good movie
Prediction: NEGATIVE (Score: 1.0 )


In [11]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [12]:
# -----------------------------
# Load Dataset from CSV
# -----------------------------
data = pd.read_csv("reviews.csv")

reviews = data["text"]
labels = data["label"]

In [13]:
# -----------------------------
# Convert Text to Bag-of-Words
# -----------------------------
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(reviews)

In [14]:
# -----------------------------
# Train-Test Split
# -----------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, labels, test_size=0.2, random_state=42
)

In [15]:
# -----------------------------
# Train Naïve Bayes Model
# -----------------------------
model = MultinomialNB()
model.fit(X_train, y_train)

In [16]:
# -----------------------------
# Prediction on Test Data
# -----------------------------
y_pred = model.predict(X_test)

In [17]:
# -----------------------------
# Accuracy
# -----------------------------
accuracy = accuracy_score(y_test, y_pred)
print("Model Accuracy:", accuracy)

Model Accuracy: 1.0


In [19]:
# -----------------------------
# Test with New Sentence
# -----------------------------
test_sentence = ["Terrible story"]
test_vector = vectorizer.transform(test_sentence)
prediction = model.predict(test_vector)

print("Test Sentence:", test_sentence[0])
print("Predicted Class:", prediction[0])

Test Sentence: Terrible story
Predicted Class: Negative
