In [None]:
import numpy as np
import matplotlib.pyplot as plt
from statistics import stdev
from sklearn.datasets import make_circles
import sympy as sp

def gen_data(seed,mean,sd,count):
    np.random.seed(seed)
    data = np.random.normal(mean,sd,count)
    # data = [round(x,0) for x in data]
    return data

def plot_hist(x,y):
    plt.hist(x, histtype='step', stacked=True, fill=False,color='red')
    plt.hist(y, histtype='step', stacked=True, fill=False,color='blue')
    
def likelihood(data,x):
    mean1 = sum(data)/len(data)
    sd1 = stdev(data)
    
    y = np.exp(-(1/2)*((x-mean1)/sd1)**2)/(sd1*np.sqrt(2*np.pi))
    
    return y
    
def gx(data,data2,x,pc):
    mean = sum(data)/len(data)
    sd = stdev(data)
    px = likelihood(data,x)*pc+likelihood(data2,x)*pc
    
    g = (-((x-mean)**2)/(2*sd**2))-(np.log(2*np.pi)/2)-np.log(sd)+np.log(pc)-np.log(px)
    
    return g

def posterior(data1,data2,x,pc):
    px = likelihood(data1,x)*pc+likelihood(data2,x)*pc
    
    g = likelihood(data1,x)*pc/px
    
    return g

def plot_posterior(data1,data2):
    # data = data1+data2
    data = np.concatenate((data1,data2))
    pc1 = len(data1)/len(data)
    pc2 = len(data2)/len(data)
    print(pc1,pc2)
    x = np.linspace(min(data),max(data))
    y1 = []
    y2 = []
    
    for i in x:
        y1.append(posterior(data1,data2,i,pc1))
        y2.append(posterior(data2,data1,i,pc2))
        
    plt.plot(x,y1,color='red')
    plt.plot(x,y2,color='blue')
    plt.ylim([0,1.2])
    
def plot_likelihood(data1,data2):
    data = np.concatenate((data1,data2))
    x = np.linspace(min(data),max(data))
    y1 = []
    y2 = []
    
    for i in x:
        y1.append(likelihood(data1,i))
        y2.append(likelihood(data2,i))
        
    plt.plot(x,y1,color='red')
    plt.plot(x,y2,color='blue')
    plt.ylim([0,1.2])
    
def plot_decision_boundary(data1,data2):
    data = np.concatenate((data1,data2))
    mean1 = sum(data1)/len(data1)
    mean2 = sum(data2)/len(data2)
    
    sd1 = stdev(data1)
    sd2 = stdev(data2)

    x = sp.Symbol('x')
    y = np.linspace(0,50)
    
    equation = sp.Eq(-((x-mean1)**2)/(2*sd1**2)+((x-mean2)**2)/(2*sd2**2),0)
    # equation = sp.Eq(2*x + 3, 7)
    solution = sp.solve(equation,x)
    for i in solution:
        if i < max(data) and i > min(data):
            plt.plot([i for _ in range(len(y))],y)


mean = 0
sd = 1
seed = 6  
x = gen_data(seed,mean,sd,200)
y = gen_data(seed+1,mean+2,sd+2,200)

# plot_decision_boundary(x,y)
# plot_hist(x,y)
# plt.show()

# plot_decision_boundary(x,y)
plot_likelihood(x,y)
plt.show()

# plot_decision_boundary(x,y)
plot_posterior(x,y)
plt.show()

In [None]:
from sklearn.datasets import make_circles, make_classification, make_moons
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis,LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from statistics import stdev,variance,covariance
import plotly.graph_objects as go
import sympy as sp
import numpy as np
import matplotlib.pyplot as plt

def adjust_mean_std(X, y, desired_mean, desired_std):
    # Calculate the current mean and standard deviation for each class
    mean_class_0 = X[y == 0].mean(axis=0)
    std_dev_class_0 = X[y == 0].std(axis=0)
    mean_class_1 = X[y == 1].mean(axis=0)
    std_dev_class_1 = X[y == 1].std(axis=0)

    # Adjust the data to match the desired mean and standard deviation
    X[y == 0] = (X[y == 0] - mean_class_0) * (desired_std / std_dev_class_0) + desired_mean
    X[y == 1] = (X[y == 1] - mean_class_1) * (desired_std / std_dev_class_1) + desired_mean\


def gen_dataset(mean,sd,size,seed,noise,factor,types,qorl):
    np.random.seed(seed)

    x1_class_0 = np.random.normal(loc=mean[0], scale=sd[0], size=size[0])
    x2_class_0 = np.random.normal(loc=mean[1], scale=sd[1], size=size[0])
    X_class_0 = [[x1_class_0[i],x2_class_0[i]] for i in range(size[0])]
    y_class_0 = np.zeros(size[0], dtype=int)  # Assign class label 0
    

    x1_class_1 = np.random.normal(loc=mean[2], scale=sd[2], size=size[1])
    x2_class_1 = np.random.normal(loc=mean[3], scale=sd[3], size=size[1])
    X_class_1 = [[x1_class_1[i],x2_class_1[i]] for i in range(size[1])]
    y_class_1 = np.ones(size[1], dtype=int)  # Assign class label 1
    
    # Combine the two classes
    X = np.vstack((X_class_0, X_class_1))
    y = np.concatenate((y_class_0, y_class_1))
    linearly_separable = (X, y)

    datasets = [
        make_moons(noise=noise, random_state=seed),
        make_circles(noise=noise, factor=factor, random_state=seed),
        linearly_separable,
    ]

    raw = datasets[types]
    x = raw[0]
    y = raw[1]
    class1 = x[y==0]
    class2 = x[y==1]
    
    x1 = [i[0] for i in class1]
    y1 = [i[1] for i in class1]
    x2 = [i[0] for i in class2]
    y2 = [i[1] for i in class2]
    Z = None
    xx,yy = None,None
    x_min, x_max = 0,0
    y_min, y_max = 0,0
    if qorl == 'q':
        qda1 = QuadraticDiscriminantAnalysis()
        qda1.fit(x, y)


        # Generate a mesh grid for visualization
        x_min, x_max = np.min(x[:, 0]) - 1, np.max(x[:, 0]) + 1
        y_min, y_max = np.min(x[:, 1]) - 1, np.max(x[:, 1]) + 1
        xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01), np.arange(y_min, y_max, 0.01))

        # Use the QDA models to predict class labels for mesh grid points
        mesh_data = np.c_[xx.ravel(), yy.ravel()]
        Z1 = qda1.predict(mesh_data)
       
        # Reshape the predictions to match the mesh grid shape
        Z1 = Z1.reshape(xx.shape)

        # Plot the decision boundaries for both datasets
        plt.figure(figsize=(12, 5))
        
        plt.subplot(1, 2, 1)
        plt.contourf(xx, yy, Z1, cmap=plt.cm.Paired, alpha=0.8)
        plt.scatter(x[:, 0], x[:, 1], c=y, cmap=plt.cm.Paired, edgecolors='k')
        plt.title('Quadratic discriminant')
    elif qorl == 'l':
        lda1 = LinearDiscriminantAnalysis()
        lda1.fit(x, y)


        # Generate a mesh grid for visualization
        x_min, x_max = np.min(x[:, 0]) - 1, np.max(x[:, 0]) + 1
        y_min, y_max = np.min(x[:, 1]) - 1, np.max(x[:, 1]) + 1
        xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01), np.arange(y_min, y_max, 0.01))

        # Use the QDA models to predict class labels for mesh grid points
        mesh_data = np.c_[xx.ravel(), yy.ravel()]
        Z1 = lda1.predict(mesh_data)
       
        # Reshape the predictions to match the mesh grid shape
        Z1 = Z1.reshape(xx.shape)

        # Plot the decision boundaries for both datasets
        plt.figure(figsize=(12, 5))
        
        plt.subplot(1, 2, 1)
        plt.contourf(xx, yy, Z1, cmap=plt.cm.Paired, alpha=0.8)
        plt.scatter(x[:, 0], x[:, 1], c=y, cmap=plt.cm.Paired, edgecolors='k')
        plt.title('Linear discriminant')
    # plt.title('Decision Boundary for Dataset 1')


    # plt.show()
    # plt.scatter(x1,y1,color='blue')
    # plt.scatter(x2,y2,color='red')
    
    plt.show()
    
    return class1,class2,Z1,xx,yy,x_min, x_max,y_min, y_max
    
def covariance_matrix(class1):
    x = [i[0] for i in class1]
    y = [i[1] for i in class1]
    
    varx = variance(x)
    vary = variance(y)
    covxy = covariance(x,y)
    covyx = covariance(y,x)
    
    cov_matrix = np.matrix([[varx,covxy],[covyx,vary]])
    return cov_matrix
    
    
def likelihood(class1,allclass,qorl,x1,x2):
    x = [i[0] for i in class1]
    y = [i[1] for i in class1]
    xy = np.matrix([x1,x2]).transpose()
    # print(xy)
    mean = np.matrix([sum(x)/len(x),sum(y)/len(y)]).transpose()
    cov_mat = covariance_matrix(class1)
    if qorl == 'l':
        cov_mat = covariance_matrix(allclass)
    # print(cov_mat)
    
    expo =(-1/2)*xy.transpose()*np.linalg.inv(cov_mat)*xy+mean.transpose()*np.linalg.inv(cov_mat)*xy-mean.transpose()*np.linalg.inv(cov_mat)*mean/2
    y = (1/(2*np.pi*np.sqrt(np.linalg.det(cov_mat))))*np.exp(expo)
    return y.item(0)

def posterior(data1,data2,qorl,x1,x2):
    data = np.concatenate((data1,data2))
    pc = len(data1)/len(data)
    px = likelihood(data1,data,qorl,x1,x2)*pc+likelihood(data2,data,qorl,x1,x2)*pc
    g = likelihood(data1,data,qorl,x1,x2)*pc/px
    return g

def likelihood1(mean,cov1,x1,x2):
    xy = np.matrix([x1,x2]).transpose()
    # print(xy)
    mean = np.matrix([mean[0],mean[1]]).transpose()
    cov_mat = np.matrix(cov1)
    expo =(-1/2)*xy.transpose()*np.linalg.inv(cov_mat)*xy+mean.transpose()*np.linalg.inv(cov_mat)*xy-mean.transpose()*np.linalg.inv(cov_mat)*mean/2
    y = (1/(2*np.pi*np.sqrt(np.linalg.det(cov_mat))))*np.exp(expo)
    return y.item(0)

def posterior1(mean,mean2,cov1,cov2,x1,x2):
    px = likelihood1(mean,cov1,x1,x2)+likelihood1(mean2,cov2,x1,x2)
    g = likelihood1(mean,cov1,x1,x2)/px
 
    return g

def gx1(mean1,mean2,cov1,cov2,x1,x2):
    px = likelihood1(mean1,cov1,x1,x2)+likelihood1(mean2,cov2,x1,x2)
    xy = np.matrix([x1,x2]).transpose()
    # print(xy)
    mean = np.matrix([mean1[0],mean1[1]]).transpose()
    cov_mat = np.matrix(cov1)
    expo =(-1/2)*xy.transpose()*np.linalg.inv(cov_mat)*xy+mean.transpose()*np.linalg.inv(cov_mat)*xy-mean.transpose()*np.linalg.inv(cov_mat)*mean/2
    y = expo - np.log(2*np.pi) - np.log(np.linalg.det(cov_mat))/2 + np.log(0.5) - np.log(px)
    return round(y.item(0),4)

In [None]:
#gen_dataset(mean,sd,size,seed,noise,factor,types,qorl)
qorl = 'q'    
class1,class2,zz,xx,yy,x_min, x_max,y_min, y_max = gen_dataset([1.5,0,1,0],[2,1,1,2],[50,50],1,0.2,0.5,2,qorl)
allclass = np.concatenate((class1,class2))
print(len(class1),len(class2))
print([sum([i[0] for i in class1])/len(class1),sum([i[1] for i in class1])/len(class1)])
print([sum([i[0] for i in class2])/len(class2),sum([i[1] for i in class2])/len(class2)])
if qorl == 'q':
    print(covariance_matrix(class1))
    print(covariance_matrix(class2))
else:
    print(covariance_matrix(allclass))
# x_min, x_max = np.min(xx[:, 0]) - 1, np.max(xx[:, 0]) + 1
# y_min, y_max = np.min(yy[:, 0]) - 1, np.max(yy[:, 0]) + 1
x1 = list(np.linspace(x_min,x_max))
x2 = list(np.linspace(y_min,y_max))
z1 = []
z2 = []
p1 = []
p2 = []
for i in range(len(x1)):
    z1.append(likelihood(class1,allclass,qorl,x1[i],x2[i]))
    z2.append(likelihood(class2,allclass,qorl,x1[i],x2[i]))
    p1.append(posterior(class1,class2,qorl,x1[i],x2[i]))
    p2.append(posterior(class2,class1,qorl,x1[i],x2[i]))


X, Y = np.meshgrid(x1, x2)

# Calculate likelihood values for each combination of x1 and x2
Z1 = np.zeros_like(X)  # Initialize Z as a 2D array of zeros
Z2 = np.zeros_like(X)
P1 = np.zeros_like(X) 
P2 = np.zeros_like(X)
for i in range(len(x1)):
    for j in range(len(x2)):
        Z1[j, i] = likelihood(class1,allclass,qorl, X[j, i], Y[j, i])
        Z2[j, i] = likelihood(class2,allclass,qorl, X[j, i], Y[j, i])
        P1[j, i] = posterior(class1,class2,qorl, X[j, i], Y[j, i])
        P2[j, i] = posterior(class2,class1,qorl, X[j, i], Y[j, i])

fig = go.Figure(data=[go.Surface(z=Z1, x=X, y=Y, colorscale='Blues')
                      ,go.Surface(z=Z2, x=X, y=Y, colorscale='Reds')
                      ,go.Surface(z=zz, x=xx, y=yy, colorscale='Purples')
                      ])
fig.update_layout(title='Likelihood', autosize=False,
                  width=500, height=500,
                  margin=dict(l=65, r=50, b=65, t=90))
fig.show()

fig = go.Figure(data=[go.Surface(z=P1, x=X, y=Y, colorscale='Blues')
                      ,go.Surface(z=P2, x=X, y=Y, colorscale='Reds')
                      ,go.Surface(z=zz, x=xx, y=yy, colorscale='Purples')
                      ])
fig.update_layout(title='posterior', autosize=False,
                  width=500, height=500,
                  margin=dict(l=65, r=50, b=65, t=90))
fig.show()

In [None]:

# cov1 = [[0.5,0],
#         [0,0.25]]
# cov2 = [[0.25,0],
#         [0,0.5]]

# mean1 = [2,0]
# mean2 = [1,0]

cov1 = [[1,0],
        [0,1]]
cov2 = [[1,0],
        [0,1]]

mean1 = [-1,-1]
mean2 = [1,1]


x1 = list(np.linspace(-4,4))
x2 = list(np.linspace(-4,4))
z1 = []
z2 = []
p1 = []
p2 = []


for i in range(len(x1)):
    z1.append(likelihood1(mean1,cov1,x1[i],x2[i]))
    z2.append(likelihood1(mean2,cov2,x1[i],x2[i]))
    p1.append(posterior1(mean1,mean2,cov1,cov2,x1[i],x2[i]))
    p2.append(posterior1(mean2,mean1,cov2,cov1,x1[i],x2[i]))



X, Y = np.meshgrid(x1, x2)

# Calculate likelihood values for each combination of x1 and x2
Z1 = np.zeros_like(X)  # Initialize Z as a 2D array of zeros
Z2 = np.zeros_like(X)

P1 = np.zeros_like(X) 
P2 = np.zeros_like(X)

B = np.zeros_like(X)

for i in range(len(x1)):
    for j in range(len(x2)):
        Z1[j, i] = likelihood1(mean1,cov1, X[j, i], Y[j, i])
        Z2[j, i] = likelihood1(mean2,cov2, X[j, i], Y[j, i])
        P1[j, i] = posterior1(mean1,mean2,cov1,cov2, X[j, i], Y[j, i])
        P2[j, i] = posterior1(mean2,mean1,cov2,cov1, X[j, i], Y[j, i])
        if posterior1(mean1,mean2,cov1,cov2, X[j, i], Y[j, i])>=posterior1(mean2,mean1,cov2,cov1, X[j, i], Y[j, i]):
            B[j, i] = 0
        else:
            B[j, i] = 1
            

# print(z1)
# print(p1)
# print(z2)
# print(p2)
# print(len(Z1))

fig = go.Figure(data=[go.Surface(z=Z1, x=X, y=Y, colorscale='Blues')
                      ,go.Surface(z=Z2, x=X, y=Y, colorscale='Reds')
                      ,go.Surface(z=B, x=X, y=Y, colorscale='Purples')
                      ])
fig.update_layout(title='Likelihood', autosize=False,
                  width=500, height=500,
                  margin=dict(l=65, r=50, b=65, t=90))
fig.show()

fig = go.Figure(data=[go.Surface(z=P1, x=X, y=Y, colorscale='Blues')
                      ,go.Surface(z=P2, x=X, y=Y, colorscale='Reds')
                      ,go.Surface(z=B, x=X, y=Y, colorscale='Purples')
                      ])
fig.update_layout(title='posterior', autosize=False,
                  width=500, height=500,
                  margin=dict(l=65, r=50, b=65, t=90))
fig.show()
# plt.plot([Bx[0] for Bx in B],[By[1] for By in B])
# plt.xlim([-4,4])
# plt.ylim([-4,4])
# plt.show()
# plt.plot(x1,z1,color='blue')
# plt.plot(x1,z2,color='red')
# plt.show()
# plt.plot(x1,p1,color='blue')
# plt.plot(x1,p2,color='red')
# plt.show()
# plt.plot(x2,z1,color='blue')
# plt.plot(x2,z2,color='red')
# plt.show()
# plt.plot(x2,p1,color='blue')
# plt.plot(x2,p2,color='red')
# plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import make_circles, make_classification, make_moons
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis,LinearDiscriminantAnalysis

noise = 0.2
seed = 2
factor = 0.1
X, y = make_classification(
        n_features=2, n_redundant=0, n_informative=2, random_state=seed, n_clusters_per_class=1
    )
rng = np.random.RandomState(2)
X += 2 * rng.uniform(size=X.shape)
linearly_separable = (X, y)

datasets = [
    make_moons(noise=noise, random_state=seed),
    make_circles(noise=noise, factor=factor, random_state=seed),
    linearly_separable,
]
raw = datasets[2]
data1 = raw[0]
labels = raw[1]

data = np.hstack((data1 ,(data1[:, 0] * data1[:, 1]).reshape(-1, 1),data1**2))
# Create a logistic regression model
logistic_reg = LogisticRegression()
logistic_reg.fit(data, labels)

# Generate a mesh grid for visualization
x_min, x_max = np.min(data[:, 0]) - 1, np.max(data[:, 0]) + 1
y_min, y_max = np.min(data[:, 1]) - 1, np.max(data[:, 1]) + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01), np.arange(y_min, y_max, 0.01))

# Use the logistic regression model to predict class labels for mesh grid points
mesh_data = np.c_[xx.ravel(), yy.ravel(),xx.ravel()*yy.ravel(), xx.ravel()**2, yy.ravel()**2]
Z = logistic_reg.predict(mesh_data)

# Reshape the predictions to match the mesh grid shape
Z = Z.reshape(xx.shape)

# Plot the decision boundary
plt.figure(figsize=(8, 6))
plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.8)
plt.scatter(data[:, 0], data[:, 1], c=labels, cmap=plt.cm.Paired, edgecolors='k')
plt.title('Decision Boundary for Logistic Regression')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.show()