In [1]:
import pandas as pd

data = {
    'age': ['<=30', '<=30', '31…40', '>40', '>40', '>40', '31…40', '<=30', '<=30', '>40',
            '<=30', '31…40', '31…40', '>40'],
    'income': ['high', 'high', 'high', 'medium', 'low', 'low', 'low', 'medium', 'low',
               'medium', 'medium', 'medium', 'high', 'medium'],
    'student': ['no', 'no', 'no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'yes', 'yes',
                'no', 'yes', 'no'],
    'credit_rating': ['fair', 'excellent', 'fair', 'fair', 'fair', 'excellent', 'excellent',
                      'fair', 'fair', 'fair', 'excellent', 'excellent', 'fair', 'excellent'],
    'buys_computer': ['no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'no', 'yes', 'yes',
                      'yes', 'yes', 'yes', 'no']
}

df = pd.DataFrame(data)

prior_prob = df['buys_computer'].value_counts(normalize=True)
print("Prior Probabilities for 'buys_computer' classes:")
print(prior_prob)


Prior Probabilities for 'buys_computer' classes:
buys_computer
yes    0.642857
no     0.357143
Name: proportion, dtype: float64


In [5]:
import pandas as pd

data = {
    'age': ['<=30', '<=30', '31…40', '>40', '>40', '>40', '31…40', '<=30', '<=30', '>40',
            '<=30', '31…40', '31…40', '>40'],
    'income': ['high', 'high', 'high', 'medium', 'low', 'low', 'low', 'medium', 'low',
               'medium', 'medium', 'medium', 'high', 'medium'],
    'student': ['no', 'no', 'no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'yes', 'yes',
                'no', 'yes', 'no'],
    'credit_rating': ['fair', 'excellent', 'fair', 'fair', 'fair', 'excellent', 'excellent',
                      'fair', 'fair', 'fair', 'excellent', 'excellent', 'fair', 'excellent'],
    'buys_computer': ['no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'no', 'yes', 'yes',
                      'yes', 'yes', 'yes', 'no']
}

df = pd.DataFrame(data)

class_conditional_densities = {}
for feature in df.columns[:-1]:
    class_conditional_densities[feature] = df.groupby(['buys_computer', feature]).size() / len(df)

for feature, density in class_conditional_densities.items():
    print(f"Class-Conditional Density for '{feature}':")
    print(density)
    print()

for feature, density in class_conditional_densities.items():
    if density.isnull().any():
        print(f"At least one class-conditional density for '{feature}' has zero values.")

Class-Conditional Density for 'age':
buys_computer  age  
no             <=30     0.214286
               >40      0.142857
yes            31…40    0.285714
               <=30     0.142857
               >40      0.214286
dtype: float64

Class-Conditional Density for 'income':
buys_computer  income
no             high      0.142857
               low       0.071429
               medium    0.142857
yes            high      0.142857
               low       0.214286
               medium    0.285714
dtype: float64

Class-Conditional Density for 'student':
buys_computer  student
no             no         0.285714
               yes        0.071429
yes            no         0.214286
               yes        0.428571
dtype: float64

Class-Conditional Density for 'credit_rating':
buys_computer  credit_rating
no             excellent        0.214286
               fair             0.142857
yes            excellent        0.214286
               fair             0.428571
dtype: float64



In [6]:
import pandas as pd
from scipy.stats import chi2_contingency

data = {
    'age': ['<=30', '<=30', '31…40', '>40', '>40', '>40', '31…40', '<=30', '<=30', '>40',
            '<=30', '31…40', '31…40', '>40'],
    'income': ['high', 'high', 'high', 'medium', 'low', 'low', 'low', 'medium', 'low',
               'medium', 'medium', 'medium', 'high', 'medium'],
    'student': ['no', 'no', 'no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'yes', 'yes',
                'no', 'yes', 'no'],
    'credit_rating': ['fair', 'excellent', 'fair', 'fair', 'fair', 'excellent', 'excellent',
                      'fair', 'fair', 'fair', 'excellent', 'excellent', 'fair', 'excellent'],
    'buys_computer': ['no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'no', 'yes', 'yes',
                      'yes', 'yes', 'yes', 'no']
}

df = pd.DataFrame(data)

# Create contingency table (cross-tabulation) for selected columns
#The chi-square test for independence is a statistical method used to determine if there is a significant association between categorical variables by comparing 
#observed and expected frequencies in a contingency table. If the test shows a low p-value (< 0.05), it suggests that the variables are dependent.
contingency_table = pd.crosstab(df['age'], [df['income'], df['student'], df['credit_rating'], df['buys_computer']])

# Perform chi-square test of independence
chi2, p, dof, expected = chi2_contingency(contingency_table)

# Display results
print(f"Chi-square Statistic: {chi2}")
print(f"P-value: {p}")
print(f"Degrees of Freedom: {dof}")


Chi-square Statistic: 25.200000000000003
P-value: 0.3949583040945451
Degrees of Freedom: 24


In [7]:
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Given data
data = {
    'age': ['<=30', '<=30', '31…40', '>40', '>40', '>40', '31…40', '<=30', '<=30', '>40', '<=30', '31…40', '31…40', '>40'],
    'income': ['high', 'high', 'high', 'medium', 'low', 'low', 'low', 'medium', 'low', 'medium', 'medium', 'medium', 'high', 'medium'],
    'student': ['no', 'no', 'no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'no'],
    'credit_rating': ['fair', 'excellent', 'fair', 'fair', 'fair', 'excellent', 'excellent', 'fair', 'fair', 'fair', 'excellent', 'excellent', 'fair', 'excellent'],
    'buys_computer': ['no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'yes', 'no']
}

# Create a DataFrame
df = pd.DataFrame(data)

# Encoding string data into numerical data
le = LabelEncoder()
df_encoded = df.apply(le.fit_transform)

# Separate features and target variable
X = df_encoded.drop('buys_computer', axis=1)
y = df_encoded['buys_computer']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build a Naïve-Bayes classifier
model = GaussianNB()
model.fit(X_train, y_train)

# Make predictions on the test set
predictions = model.predict(X_test)

# Calculate accuracy of the predictions
accuracy = accuracy_score(y_test, predictions)

print("\nNaïve-Bayes Classifier Test Set Accuracy:", accuracy)



Naïve-Bayes Classifier Test Set Accuracy: 0.6666666666666666


In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the dataset
data = pd.read_csv('Telugu_Char_Gabor_.csv')

# Separate the features and labels
X = data.drop(columns=['ImageName','Label'])
y = data['Label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Gaussian Naive Bayes classifier
model = GaussianNB()

# Train the classifier
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)

# Print the accuracy
print(accuracy)

0.23731884057971014
