## Reading data

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('data 6.csv')

In [3]:
df

Unnamed: 0,Refund,Marital_Status,Taxable_Income,Evade
0,Yes,Single,125000,No
1,No,Married,100000,No
2,No,Single,70000,No
3,Yes,Married,120000,No
4,No,Divorced,95000,Yes
5,No,Married,60000,No
6,Yes,Divorced,220000,No
7,No,Single,85000,Yes
8,No,Married,75000,No
9,No,Single,90000,Yes


## Function definitions for probability and normalization

In [4]:
def p_x_given_y(x, mean_y, variance_y): # normalize function
    
    p = 1/(np.sqrt(2*np.pi*variance_y)) * np.exp((-(x-mean_y)**2)/(2*variance_y))
    return p

def probability(column, status, evade, total_num): # single feature probability function
    
    condition1 = df[column] == status
    condition2 = df['Evade'] == evade
    result = condition1 & condition2
    return round((len(result[result==True])/total_num),3)

## Forecast column analysis

In [5]:
total_no = df['Evade'][df['Evade']=='No'].count()
total_yes = df['Evade'][df['Evade']=='Yes'].count()
total = df['Evade'].count()
probYes = total_yes/total
probNo = total_no/total

In [6]:
print("Total Evade: ", total)
print("out of which ", total_yes, " are Yes")
print("and ", total_no, " are No")
print("Probability Yes: ", probYes)
print("Probability No: ", probNo)

Total Evade:  10
out of which  3  are Yes
and  7  are No
Probability Yes:  0.3
Probability No:  0.7


## Individual features probability

### Refund

In [7]:
probability("Refund", "Yes", "No", total_no) # p(Refund=Yes | Evade=No)

0.429

In [8]:
probability("Refund", "Yes", "Yes", total_yes) # p(Refund=Yes | Evade=Yes)

0.0

In [9]:
probability("Refund", "No", "No", total_no) # p(Refund=No | Evade=No)

0.571

In [10]:
probability("Refund", "No", "Yes", total_yes) # p(Refund=No | Evade=Yes)

1.0

### Marital Status

In [11]:
probability("Marital_Status", "Single", "Yes", total_yes) # p(Marital Status=Single | Evade=Yes)

0.667

In [12]:
probability("Marital_Status", "Single", "No", total_no) # p(Marital Status=Single | Evade=No)

0.286

In [13]:
probability("Marital_Status", "Married", "Yes", total_yes) # p(Marital Status=Married | Evade=Yes)

0.0

In [14]:
probability("Marital_Status", "Married", "No", total_no) # p(Marital Status=Married | Evade=No)

0.571

In [15]:
probability("Marital_Status", "Divorced", "Yes", total_yes) # p(Marital Status=Divorced | Evade=Yes)

0.333

In [16]:
probability("Marital_Status", "Divorced", "No", total_no) # p(Marital Status=Single | Evade=No)

0.143

### Taxable Income

In [17]:
data_means = df.groupby('Evade').mean() # Taxable income mean
data_means

Unnamed: 0_level_0,Taxable_Income
Evade,Unnamed: 1_level_1
No,110000
Yes,90000


In [18]:
data_variance = df.groupby('Evade').var() # Taxable income variance
data_variance

Unnamed: 0_level_0,Taxable_Income
Evade,Unnamed: 1_level_1
No,2975000000
Yes,25000000


In [19]:
# Means for Taxable_Income | Evade
tax_yes_mean = data_means['Taxable_Income'][data_variance.index == 'Yes'].values[0]
tax_no_mean = data_means['Taxable_Income'][data_variance.index == 'No'].values[0]

# Variance Taxable_Income | Evade
tax_yes_variance = data_variance['Taxable_Income'][data_variance.index == 'Yes'].values[0]
tax_no_variance = data_variance['Taxable_Income'][data_variance.index == 'No'].values[0]

In [20]:
p_x_given_y(125000, data_means['Taxable_Income'][1], data_variance['Taxable_Income'][1]) # Income=125000 | Evade=Yes

1.826944081672919e-15

In [21]:
p_x_given_y(125000, data_means['Taxable_Income'][0], data_variance['Taxable_Income'][0]) #Income=125000 | Evade=No

7.042772831514902e-06

## Generic Program

In [22]:
print("Enter Value for Refund, Marital Status and Taxable Income respectively: ")
refund = input("Refund: ")
marital_status = input("Marital_Status: ")
taxable_income = input("Taxable_Income: ")
# please note that this program does not take debuggin into account hence all
# input is case and syntax sensitive.

Enter Value for Refund, Marital Status and Taxable Income respectively: 
Refund: Yes
Marital_Status: Divorced
Taxable_Income: 90000


### Test case 1

In [23]:
# calculating prediction for Yes
refund_prob = probability("Refund", refund, "Yes", total_yes)
marital_prob = probability("Marital_Status", marital_status, "Yes", total_yes)
tax_prob = p_x_given_y(int(taxable_income), data_means['Taxable_Income'][1], data_variance['Taxable_Income'][1])
yes = refund_prob * marital_prob * tax_prob

# calculating prediction for No
refund_prob = probability("Refund", refund, "No", total_no)
marital_prob = probability("Marital_Status", marital_status, "No", total_no)
tax_prob = p_x_given_y(int(taxable_income), data_means['Taxable_Income'][0], data_variance['Taxable_Income'][0])
no = refund_prob * marital_prob * tax_prob

print("Hence Yes = ", yes)
print("and No = ", no)

# test case
if(yes > no):
    print('Evade is Yes')
else:
    print('Evade is No')

Hence Yes =  0.0
and No =  4.195305995843191e-07
Evade is No


In [24]:
print("Enter Value for Refund, Marital Status and Taxable Income respectively: ")
refund = input("Refund: ")
marital_status = input("Marital_Status: ")
taxable_income = input("Taxable_Income: ")
# please note that this program does not take debuggin into account hence all
# input is case and syntax sensitive.

Enter Value for Refund, Marital Status and Taxable Income respectively: 
Refund: No
Marital_Status: Married
Taxable_Income: 60000


### Test case 2

In [25]:
# calculating prediction for Yes
refund_prob = probability("Refund", refund, "Yes", total_yes)
marital_prob = probability("Marital_Status", marital_status, "Yes", total_yes)
tax_prob = p_x_given_y(int(taxable_income), data_means['Taxable_Income'][1], data_variance['Taxable_Income'][1])
yes = refund_prob * marital_prob * tax_prob

# calculating prediction for No
refund_prob = probability("Refund", refund, "No", total_no)
marital_prob = probability("Marital_Status", marital_status, "No", total_no)
tax_prob = p_x_given_y(int(taxable_income), data_means['Taxable_Income'][0], data_variance['Taxable_Income'][0])
no = refund_prob * marital_prob * tax_prob

print("Hence Yes = ", yes)
print("and No = ", no)

# test case
if(yes > no):
    print('Evade is Yes')
else:
    print('Evade is No')

Hence Yes =  0.0
and No =  1.5666144362188615e-06
Evade is No
