In [2]:
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
import seaborn as sns
from numpy.linalg import svd
from scipy import stats

In [3]:
# statistic 

def std(a):
    ar = np.array(a)
    std = np.std(ar)
    print("std is:",std)
    return std

def variance(a):
    ar = np.array(a)
    v = np.var(ar)
    print("var is:",v)
    return v

def mode(a):
    return stats.mode(a)[0][0]

def stat(data):
    n = len(data)
    q1 = math.floor(n*0.25)
    q3 = math.floor(n*0.75)
    q1_value=data[q1]
    q3_value=data[q3]
    iqr=q3_value-q1_value
    result ={
        "median":np.median(data),
        "mode":mode(data),
        "MIN":np.min(data),
        "MAX":np.max(data),
        "Q1":q1_value,
        "Q3":q3_value,
        "IQR":iqr,
    }

    return result
    
def outlier_det(data):
    sta = stat(data)
    iqr = sta["IQR"]
    min_edge = sta["Q1"]-1.5*iqr
    max_edge = sta["Q3"]+1.5*iqr
    result=[]
    print("最小边缘是:",min_edge)
    print("最大边缘是:",max_edge)
    for i in data:
        if i < min_edge or i > max_edge:
            result.append(i)
    print("outliers 有:",result)
    return result


In [4]:
# normalization: min-max,z-score,decimal scaling
def min_max(v,o_min,o_max,n_min,n_max):
    result = (((v-o_min)*(n_max-n_min))/(o_max-o_min))+n_min
    return result

def z_score(data,m=None,s=None):
    if m:
        me = m
    else:
        me = np.mean(data)
    if s:
        st = s
    else:
        st = std(data)
    result =[]
    for i in data:
        result.append(round((i-me)/st,3))

    return result

def decimal_scaling(data):
    result=[]
    maximum = np.max(data)
    bits = round(np.log10(maximum))
    for d in data:
        result.append(d/(10**bits))
    return result

data = [100,400,600,800,3000,4200]

# min_max(73000,12000,98000,0,1)
print(z_score(data))
print(decimal_scaling(data))

std is: 1527.9797846248562
[-0.927, -0.731, -0.6, -0.469, 0.971, 1.756]
[0.01, 0.04, 0.06, 0.08, 0.3, 0.42]


In [5]:
# binning equal-depth,equal-width
def equal_depth(data,num):
    n = len(data)
    each = n//num
    result=[]
    i=0
    while i<=n-4:
        result.append(data[i:i+each])
        i+=each
    print(result)
    return result

def equal_width(data,num):
    width = (np.max(data)-np.min(data))//num
    print("width:",width)
    edge=np.min(data)+width
    result=[[] for _ in range(num)]
    for i in range(num):
        for j in data:
            if edge>=j:
                result[i].append(j)
            else:
                edge = edge+width
                break
            
    return result

data = [8, 10, 15, 35, 50, 52, 85, 89, 92, 158, 201, 251]    
equal_width(data,3)

width: 81


[[8, 10, 15, 35, 50, 52, 85, 89],
 [8, 10, 15, 35, 50, 52, 85, 89, 92, 158],
 [8, 10, 15, 35, 50, 52, 85, 89, 92, 158, 201, 251]]

In [6]:
# distance & similarity
def Euclidean(x,y):
    x=np.array(x)
    y=np.array(y)
    distance = np.linalg.norm(np.subtract(x,y))
    return distance

def Manhattan(x,y):
    x=np.array(x)
    y=np.array(y)
    distance = np.linalg.norm(np.subtract(x,y),ord=1)
    return distance
    
def Supremum(x,y):
    x=np.array(x)
    y=np.array(y)
    distance = np.linalg.norm(np.subtract(x,y),ord=np.inf)
    return distance

def Minkowski(x,y,o):
    x=np.array(x)
    y=np.array(y)
    distance = np.linalg.norm(np.subtract(x,y),ord=o)
    return distance


def sim_mat(data):
    sm = [[0 for _ in range(len(data))] for _ in range(len(data))]
    dm = dis_mat(data,data)
    for i in range(len(data)):
        for j in range(i):
            sm[i][j]=1-dm[i][j]
    return sm
            

def dis_mat(data1,data2, dis_type=2,s=False):
    dm = [[0 for _ in range(len(data2))] for _ in range(len(data1))]
    for i in range(len(data1)):
        for j in range(i):
            if dis_type==1:
                dm[i][j]=round(Manhattan(data1[i],data2[j]),3)
            elif dis_type==2:
                dm[i][j]=round(Euclidean(data1[i],data2[j]),3)
            elif dis_type==3:
                dm[i][j]=round(Supremum(data1[i],data2[j]),3)
                
            if s:
                dm[j][i]=dm[i][j]
    return dm

def print_dm(dm):
    for i in dm:
        print(i)
    return True

def SMC(x,y):
    l = len(x)
    f_11=f_01=f_10=f_00=0
    for i in range(l):
        if x[i]==y[i] and x[i]==1:
            f_11+=1
        elif x[i]==y[i] and x[i]==0:
            f_00+=1
        else:
            if x[i]==0:
                f_01+=1
            else:
                f_10+=1
    similarity = (f_11+f_00)/(f_11+f_01+f_10+f_00)
    return similarity

def Jaccard(x,y):
    l = len(x)
    f_11=f_01=f_10=f_00=0
    for i in range(l):
        if x[i]==y[i] and x[i]==1:
            f_11+=1
        elif x[i]==y[i] and x[i]==0:
            f_00+=1
        else:
            if x[i]==0:
                f_01+=1
            else:
                f_10+=1
    similarity = (f_11)/(f_11+f_01+f_10)
    return similarity
    
    
def L2(v):
    return round(np.linalg.norm(v),3)

def cos_sim(v1,v2,d=3):
    top = dot_product(v1,v2)
    print("\n分子:",top)
    bottom = round(L2(v1)*L2(v2),d)
    print("分母:",bottom)
    result = round(top/bottom,d)
    print("cosine similarity:",result)
    return result

def dot_product(a,b):
    a=np.array(a)
    b=np.array(b)
    result = round(np.dot(a,b),3)
    return result

def EJ(x,y):
    top = dot_product(x,y)
    print("\n分子:",top)
    bottom = round(L2(x)+L2(y)-top,3)
    print("分母:",bottom)
    result = round(top/bottom,3)
    print("Extend Jaccard:",result)
    return result
    
def pearson(x,y):
    return np.corrcoef(x,y)

def correlation(x,y):
    n = len(x)
    r=0
    s_x=0
    s_y=0
    for i in range(n):
        r+=(x[i]-np.mean(x))*(y[i]-np.mean(y))
        s_x+=np.square(x[i]-np.mean(x))
        s_y+=np.square(y[i]-np.mean(y))
    cov = r/(n-1)
    print(cov)
    std_x = np.sqrt(s_x/(n-1))
    print(std_x)
    std_y = np.sqrt(s_y/(n-1))
    print(std_y)
    result = cov/(std_x*std_y)
    return result

def kevin_mean(x):
    r = []
    for i in x:
        if i != 0:
            r.append(i)
    result = np.mean(r)
    return result
    
def kevin_pearson(x,y):
    
    m1 = kevin_mean(x)
    m2 = kevin_mean(y)
    for i in range(len(x)):
        if x[i] !=0:
            x[i]=round(np.subtract(x[i],m1),1)

    for i in range(len(y)):
        if y[i] !=0:
            y[i]=round(np.subtract(y[i],m2),1)

    result = round(cos_sim(x,y),2)
    return result


def item_item(sims,rates):
    r = np.dot(sims,rates)
    result = r/np.sum(sims)
    return round(result,2)



# pearson(x,y)
# correlation(x,y)
# np.mean(x)

a = [4,0,0,5,1,0,0]
b = [0,0,0,2,4,5,0]
EJ(a,b)



分子: 14
分母: -0.811
Extend Jaccard: -17.263


-17.263

In [7]:
# entropy

def entropy(x):
    n = len(x)
    r=0
    s = np.sum(x)
    for i in range(n):
        p = x[i]/s
        r+=p*np.log2(p)
    result=round(-1*r,3)
    return result

# GINI index最小的切割
def gini(x):
    n = len(x)
    r = 0
    s = np.sum(x)
    for i in range(n):
        p = x[i]/s
        r+=np.square(p)
        
    
    result = round(1-r,3)
    return result

# 输入个数列表，如果是id，则全为1
def split_info(x):
    r=0
    n=len(x)
    s = np.sum(x)
    for i in range(n):
        p = x[i]/s
        r+=p*np.log2(p)
    result=-1*r
    return result


def error(x):
    s = np.sum(x)
    n = len(x)
    for i in range(n):
        x[i]/=s
    result = round(1-np.max(x),3)
    return result

# print(((4/20)*gini([1,3])+(8/20)*gini([1,0])+(8/20)*gini([1,7])))

In [8]:
# clustering

def update_center(a):
    m = np.mean(a,axis=0)
    x=round(m[0],3)
    y=round(m[1],3)
    result = [x,y]
    return result

def cent(x,center,cluster,first=True):
    C = center
    dm = [0 for _ in range(len(center))]
    for i in range(len(C)):
        dm[i]=np.around(Euclidean(x,C[i]))
    close = np.argmin(dm)
    print("\n当前点为:",x)
    print("对所有点的距离为",dm)
    print("所以最接近的集群为 {}, 中心为 {}".format(close+1,center[close]))
#     if first and dm[close]!=0:
    cluster[close].append(x)
#     center[close]=update_center(cluster[close])

    return cluster


def cxm_kmeans(data,center,k=3,first=True):
    if first:
        cluster = [[center[i]] for i in range(k)]
    else:
        cluster = [[] for _ in range(k)]

    for i in range(len(data)):
        x = data[i]
        cluster = cent(x,center,cluster,first)

    new_center = []
    for c in cluster:
        new_center.append(update_center(c))
    
    print(f"\n迭代完成后，新的中心为:\n{new_center}")
    print(f"\n结果为:\n{cluster}\n")
    return new_center
    

In [9]:
# DBSCAN

def group_average(dis,size):
    return np.sum(dis)/size
    
def dbscan(data,eps,minpoints):
    dm = dis_mat(data,data,2,True)
    print_dm(dm)
    n=len(data)
    count_dict={}
    cores = []
    boarder=[]
    noise = []
    for i in range(n):
        count_dict[i]=[]
        for j in range(n):
            if i==j:
                continue
            if dm[i][j]<=eps:
                count_dict[i].append(j+1)
#         print(count_dict[i])
        if len(count_dict[i])>=minpoints-1:
            cores.append(i+1)
        elif len(count_dict[i])<minpoints-1 and len(count_dict[i])>0:
            boarder.append(i+1)
        else:
            noise.append(i+1)
            
    return cores,boarder,noise
    
def data_plot(data):
    x=[]
    y=[]
    for i in data:
        x.append(i[0])
        y.append(i[1])

    # 生成图形
    plt.scatter(x, y) # 颜色绿色，点形圆形，线性虚线，设置图例显示内容，线条宽度为2

    plt.ylabel('y') # 横坐标轴的标题
    plt.xlabel('x') # 纵坐标轴的标题
    plt.xticks(np.arange(0, 11, 1)) # 设置横坐标轴的刻度为 0 到 10 的数组
    plt.yticks(np.arange(0, 11, 1))
#     plt.ylim([-2, 2]) # 设置纵坐标轴范围为 -2 到 2
#     plt.legend() # 显示图例, 图例中内容由 label 定义
    plt.grid() # 显示网格
    plt.title('CHEN XUMIN 19430019') # 图形的标题

    # 显示图形
    plt.show()



In [10]:
# 排列组合
def fac(num):
    factorial = 1
    for i in range(1,num+1):
         factorial = factorial*i
    return factorial

#括号形式的组合表示，top表示总数，bottom表示取多少个
def Combination(top,bottom):
    result = fac(top)/(fac(top-bottom)*fac(bottom))
    return result

print(Combination(10,5))
print(Combination(10,4))



252.0
210.0


In [11]:
# TF-IDF
def TF(v):
    tf = []
    for f in v:
        tf.append(round(f/np.max(v),3))
    print("normalized-tf:",tf)
    return tf

def IDF(N,DF):
    idf = []
    for df in DF:
        idf.append(round(np.log10(N/df),3))
    print("IDF:",idf)
    return idf

# 这里只计算了一行的tf-idf
def TF_IDF(v,df,N):
    tf = TF(v)
    idf = IDF(N,df)
    result=[]
    for i in range(len(df)):
        result.append(tf[i]*idf[i])
    print("tf-idf:",result)
    return result

def smoothing(f,dj,l):
    length=len(f)
    top = np.add(l , f)
    bottom = length*l+dj
    result = top/bottom
    return result


def str2index(data):
    n=len(data)
    m = []
    vocab={}
    for i in range(n):
        for j in data[i]:
            if j not in m:
                m.append(j)
                
    for i in range(len(m)):
        vocab[m[i]]=i
    print(vocab)
    result=[[0 for _ in range(len(m))]for _ in range(n)]
    
    for i in range(n):
        for j in data[i]:
            result[i][vocab[j]]+=1
            
    return result
            

In [12]:
# linear algebra
def Cov_matrix(X):
    mean = np.mean(X,axis=1)
    cov_m = np.cov(X)
    return cov_m

def EigDec(X):
    val,vec = np.linalg.eig(X)
    print("eigen value are:\n",val)
    print("max",max(val))
    print("eigen vector are:\n",vec)
    return val,vec

def PCA(X):
    m = Cov_matrix(X)
    val,vec = EigDec(m)
    return val,vec

def simple_svd(X):
    U,s,VT = svd(X)
    print("\n X = \n",X)
    print("\n U = \n",U)
    print("\n s = \n",s)
    print("\n VT = \n",VT)
    recovered_X = U.dot(np.diag(s)).dot(VT)
    return recovered_X

def kevin_rank(X):
    result=np.linalg.matrix_rank(X)
    return result


In [13]:
# Simplified PageRank by Power Iteration Method
# iteration 是迭代次数
# init是初始化的值，类型为列表

def PageRank_PI(A,r,iteration):
    
    for i in range(len(A)):
        A[i]=np.multiply(A[i],1/(np.sum(A[i])))
    
    M = np.array(A).T

    print(M)
    print(f"This iteration 0,new Page Rank is :\n{r}")
    r= np.array(r)
    for i in range(iteration):
        r= np.around(np.dot(M,r),4)
        print(f"This iteration{i+1},new Page Rank is :\n{r}")
        
    return r
    
    
# PageRank with Damping Factor
def PageRank_DF(A,r,b,S,iteration):
    N = len(S)
    
    for i in range(len(A)):
        A[i]=np.multiply(A[i],1/(np.sum(A[i])))
    
    AT = np.array(A).T
    M = []
    factor = (1-b)/N
    for i in range(len(AT)):
        if i+1 in S:
            M.append(np.add(np.multiply(AT[i],b),factor))
        else:
            M.append(np.multiply(AT[i],b))
        
    print(f"The Matrix is :\n{M}")
    print(f"This iteration 0,new Page Rank is :\n{r}")
    r = np.array(r)
    for i in range(iteration):
        r = np.around(np.dot(M,r.T),4)
        print(f"\nThis iteration {i+1},new Page Rank is :\n{r}")
        
    return r



In [14]:
# apriori
def count_item(data,k=1):
    N = len(data)
    nums={}
    if k==1:
        for i in range(N):
            for j in data[i]:
                if j not in nums:
                    nums[j]=1
                else:
                    nums[j]+=1

    elif k==2:
        for i in range(N):
            s= " ".join(data[i])
            if s not in nums:
                nums[s]=1
            else:
                nums[s]+=1
    print(f"the frequency is :{nums}")
    return nums

def transactions():
    N = int(input("please input your transaction number："))
    trans=[]
    for i in range(N):
        trans.append([i for i in input("please input the items:").split()])
        
    print(f"The transaction tables is following:\n{trans}")
    return trans

def one_itemset(nums,minsup):
    items = nums.keys()
    freq = []
    for i in items:
        if nums[i]>=minsup:
            freq.append(i)
    return freq

def two_itemset(dataset,freq,minsup):
    n = len(freq)
    candidates=[]
    for i in range(n):
        for j in range(i+1,n):
            candidates.append([freq[i],freq[j]])
            

    frequency={}
    for c in candidates:
        c.sort()
        for d in dataset:
            s = "".join(d)
            if c[0] in s and c[1] in s:
                items = " ".join(c)
                if items not in frequency:
                    frequency[items]=1
                else:
                    frequency[items]+=1
    print(f"\nall possible candidate frequency:\n{frequency}")
    freq_itemsets = one_itemset(frequency,minsup)
    print(f"\nall frequent 2-itemsets:\n{freq_itemsets}")
    itemsets=[]
    for i in freq_itemsets:
        itemsets.append(i.split())
    return itemsets
                
    
def three_itemset(dataset,freq,minsup):
    n = len(freq)
    freq.sort()
    candidates=[]
    for i in range(n):
        for j in range(i+1,n):
            temp = freq[i].copy()
            temp.extend(freq[j])
            itemset=list(set(temp))
            if len(itemset)>3:
                continue
            itemset.sort()
            if itemset not in candidates:
                candidates.append(itemset)
    
    frequency ={}
    for c in candidates:
        c.sort()
        for d in dataset:
            s = "".join(d)
            if c[0] in s and c[1] in s and c[2] in s:
                items = " ".join(c)
                if items not in frequency:
                    frequency[items]=1
                else:
                    frequency[items]+=1
    print(f"\n all possible candidate frequency:\n{frequency}")
    freq_itemsets = one_itemset(frequency,minsup)
    print(f"\nall frequent 3 itemsets:\n{freq_itemsets}")
    itemsets=[]
    for i in freq_itemsets:
        itemsets.append(i.split())
    return itemsets
    
    
def count_all(dataset):
    cp = dataset.copy()
    n = len(dataset)
    max_len=0
    for i in data:
        if len(i)>max_len:
            max_len=len(i)
    items=[]
    for i in cp:
        items.extend(i)
    items =list(set(items))
    frequency={}
    for i in items:
        frequency[i]=0
        for j in dataset:
            if i in j:
                frequency[i]+=1
    
    i2 = []  
    for i in range(len(items)):
        for j in range(i+1,len(items)):
            i2.append([items[i],items[j]])
            

    for c in i2:
        c.sort()
        its = " ".join(c)
        frequency[its]=0
        for d in dataset:
            s = "".join(d)
            if c[0] in s and c[1] in s:
                frequency[its]+=1
                
                
                
    i3=[]
    for i in range(len(i2)):
        for j in range(i+1,len(i2)):
            temp = i2[i].copy()
            temp.extend(i2[j])
            its =list(set(temp))
            if len(its)>3:
                continue
            its.sort()
            if its not in i3:
                i3.append(its)

    for c in i3:
        c.sort()
        its = " ".join(c)
        frequency[its]=0
        for d in dataset:
            s = "".join(d)
            if c[0] in s and c[1] in s and c[2] in s:
                frequency[its]+=1
                
    return frequency
            
            
def confidence(top,bottom,frequency):
    top_count = frequency[top]
    print(top,top_count)
    bottom_count = frequency[bottom]
    print(bottom,bottom_count)
    result = top_count/bottom_count
    return result

def fp(dataset,threshold):
    header = count_item(dataset)
    freq_list = one_itemset(header,min_sup)
    freq = all_frequency =count_all(dataset)
    result = []
    for i in range(len(dataset)):
        temp=[]
        for j in dataset[i]:
            if j in freq_list:
                temp.append(j)
                
        result.append(temp)
    
    print("frequent items:",result)

#     for i in range(len(result)):
#         for j in range(len(result[i])):
#             for k in range(j,len(result[i])):
#                 if freq[result[i][j]]>freq[result[i][k]]:
#                     result[i][j],result[i][k]=result[i][k],result[i][j]

    return result
    

In [15]:
# 数据统计相关计算模板
a = [0, 5, 14, 16, 17, 19, 19, 19, 22, 30, 50]
print(stat(a))
print(std(a))
print(variance(a))
outlier_det(a)
mode(a)

{'median': 19.0, 'mode': 19, 'MIN': 0, 'MAX': 50, 'Q1': 14, 'Q3': 22, 'IQR': 8}
std is: 12.379001147971818
12.379001147971818
var is: 153.2396694214876
153.2396694214876
最小边缘是: 2.0
最大边缘是: 34.0
outliers 有: [0, 50]


19

In [16]:
# 相似度计算模板

In [17]:
# 决策树计算模板

id3 c4.5
total = entropy([4,4])
split = 0.5*entropy([1,3])+0.5*entropy([3,1])
informationgain = total-split
print(informationgain)


# CART
p = gini([1,4])
print(p)
s=(4/5)*gini([2,2])+(1/5)*gini([1,0])
print(s)
w = (2/5)*gini([2,0])+(2/5)*gini([2,0])+(1/5)*gini([1,0])
print(w)
e = (5/11)*gini([1,4])+(6/11)*gini([5,1])
print(e)


g1 = p-s
print(g1)
g2 = p-w
print(g2)
g3 = p-e
print(g3)
tiredness = (3/8)*gini([1,2])+(5/8)*gini([2,3])
gain_1 = p-fever
gain_2 = p-tiredness
print(gain_1)
print(gain_2)

0.32
0.4
0.0
-0.08000000000000002
0.32


In [18]:
# Apriori计算模板
dataset=transactions()
nums=count_item(dataset)
min_sup=int(input("min support is :"))
freq = one_itemset(nums,min_sup)
print(f"\nall frequent 1 itemsets {freq}")

please input your transaction number：0
The transaction tables is following:
[]
the frequency is :{}
min support is :0

all frequent 1 itemsets []


In [19]:
freq2 = two_itemset(dataset,freq,min_sup)
# print("\n",freq2)
three_itemset(dataset,freq2,min_sup)
all_frequency =count_all(dataset)
all_freq_itemsets = one_itemset(all_frequency,min_sup)

print(f"\nall items' frequency:\n{all_frequency}")
print(f"\nall the frequency itemsets:\n{all_freq_itemsets}")
# confidence("b c m","b m",all_frequency)


all possible candidate frequency:
{}

all frequent 2-itemsets:
[]

 all possible candidate frequency:
{}

all frequent 3 itemsets:
[]


TypeError: object of type 'int' has no len()

In [None]:
# fp-tree
# dataset=transactions()
# dataset=[['f', 'a', 'c', 'd', 'g', 'i', 'm', 'p'], ['a', 'b', 'c', 'f', 'l', 'm', 'o'], ['b', 'f', 'h', 'j', 'o', 'w'], ['b', 'c', 'k', 's', 'p'], ['a', 'f', 'c', 'e', 'l', 'p', 'm', 'n']]
fp(dataset,min_sup)

In [None]:
# DBSCAN 判断点计算模板
data = [
    [0, 0],
    [1, 0],
    [2, 0],
    [3, 0],
    [1, 1],
    [2, 1],
    [4, 1],
    [6, 0],
    [5, 2],
    [6, 3],
    [7, 3],
    [6, 4],
    [7, 4]
]

data_plot(data)
# dm = dis_mat(data,data)
# dm
eps = np.sqrt(2)
minpoints=4
c,b,n = dbscan(data,eps,minpoints)
print("core points:",c)
print("boarder points:",b)
print("noise points:",n)


# k-means

center = [[2,10],[5,8],[1,2]]
data = [
    [2,5],
    [8,4],
    [7,5],
    [6,4],
    [4,9]
]

new_data=[
    [2,10],
    [2,5],
    [8,4],
    [5,8],
    [7,5],
    [6,4],
    [1,2],
    [4,9]
]
data_plot(new_data)
new_center = cxm_kmeans(data,center,3)
# print(new_center)
new_center = cxm_kmeans(new_data,new_center,3,first=False)
new_center = cxm_kmeans(new_data,new_center,3,first=False)
new_center = cxm_kmeans(new_data,new_center,3,first=False)

In [None]:
# item-item推荐系统计算模板


x = [1,0,3,0,0,5,0,0,5,0,4,0]
y = [2,4,0,1,2,0,3,0,4,3,5,0]
z = [1,0,3,0,3,0,0,2,0,0,4,0]

sims=[kevin_pearson(x,y),kevin_pearson(x,z)]
print(sims)
rates=[2,3]
item_item(sims,rates)


In [20]:
# pagerank
r = [.25,.25,.25,.25]

A = [
    [0,1,0,1],
    [0,0,1,0],
    [0,1,0,1],
    [1,0,1,0]
]


PageRank_PI(A,r,10)
# b=0.7
# S = [1,2,3,4]
# PageRank_DF(A,r,b,S,3)
# a = [0,0,1,0]


[[0.  0.  0.  0.5]
 [0.5 0.  0.5 0. ]
 [0.  1.  0.  0.5]
 [0.5 0.  0.5 0. ]]
This iteration 0,new Page Rank is :
[0.25, 0.25, 0.25, 0.25]
This iteration1,new Page Rank is :
[0.125 0.25  0.375 0.25 ]
This iteration2,new Page Rank is :
[0.125 0.25  0.375 0.25 ]
This iteration3,new Page Rank is :
[0.125 0.25  0.375 0.25 ]
This iteration4,new Page Rank is :
[0.125 0.25  0.375 0.25 ]
This iteration5,new Page Rank is :
[0.125 0.25  0.375 0.25 ]
This iteration6,new Page Rank is :
[0.125 0.25  0.375 0.25 ]
This iteration7,new Page Rank is :
[0.125 0.25  0.375 0.25 ]
This iteration8,new Page Rank is :
[0.125 0.25  0.375 0.25 ]
This iteration9,new Page Rank is :
[0.125 0.25  0.375 0.25 ]
This iteration10,new Page Rank is :
[0.125 0.25  0.375 0.25 ]


array([0.125, 0.25 , 0.375, 0.25 ])

In [None]:
x= [0,5,7,3,7,2,3,7,1,1,2,5,4,5,10]
y= [2,4,9,4,9,1,1,6,0,2,3,5,6,5,12]
pearson(x,y)
# correlation(x,y)