## 逻辑回归

w = a<sub>0</sub>+a<sub>1</sub>x<sub>1</sub>+a<sub>2</sub>x<sub>2</sub>...a<sub>n</sub>x<sub>n</sub>  
g(w) = 1/(1+e<sup>-w</sup>)  
->L(w) = $\prod_{i=1}^{n}$g(w<sub>i</sub>)<sup>v<sub>i</sub></sup>(1 - g(w<sub>i</sub>))<sup>1-v<sub>i</sub></sup> , v<sub>i</sub> = 0 or 1  
->J(w) = (-1/n)$\sum_{i=1}^{n}$v<sub>i</sub>log(w<sub>i</sub>) + (1 - v<sub>i</sub>)log(1 - g(w<sub>i</sub>))  
->$\partial$J/$\partial$w = (1/n)$\sum_{i=1}^{n}$(w<sub>i</sub> - g(h<sub>a</sub>(x<sub>i</sub>)))x<sub>i</sub><sup>j</sup>  
->a = a - ($\alpha$/n)  (X<sup>T</sup>(w<sub>i</sub> - h<sub>a</sub>(x<sub>i</sub>)) 

In [163]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import csv

filepath = 'C:/Users/32673/Desktop/工作室作业/小组/week1/point_data.csv'

class Logistic:
    """逻辑回归"""
    def __init__(self, alpha = 0.001, times = 10000, group_num = 5):
        self.alpha = alpha # 学习率
        self.times = times # 迭代次数
        self.group_num = group_num # 组数
         # 路径
        self.df = None
        self.df_test = None
        self.df_train = None
         # 参数(1*4)
        self.a_group = []
        self.a_final = None

    def read_data(self):
        """读取数据"""
        self.df = pd.read_csv(filepath, header=0)
        # 分割训练集和测试集
        self.df_test = self.df.sample(n=10, replace=False)
        self.df_train = self.df.drop(self.df_test.index)

    def sigmoid(self, y):
        return 1 / (1 + np.exp(-y))

    def init_data(self):
        """return X(数据矩阵4*n), w(结果矩阵1*n)"""
        # 提取特征和标签
        df_train_group = self.df_train.sample(n = 300, replace=False)
        X = np.array([df_train_group['KILL'], df_train_group['DEATH'], df_train_group['ASSIST']]).T
        w = np.array(df_train_group['GRADE'])
        # 特征标准化
        X_mean = np.mean(X, axis=0)
        X_std = np.std(X, axis=0)
        X = (X - X_mean) / X_std  # 训练数据经处理！=0
        # 插入偏置列
        X = np.hstack([np.ones((X.shape[0], 1)), X])
        return X, w

    def train(self, X, w):
        """训练"""
        # 初始化参数
        n = X.shape[1]
        a = np.zeros(n)
        # 训练模型
        for _ in range(self.times):
            lin = X @ a
            p = self.sigmoid(lin)
            error = w - p
            
            # 计算梯度并归一化
            da = (1 / X.shape[0]) * (X.T @ error)
            a += da * self.alpha  # 更新参数
        self.a_group.append(a)

    def groups(self):
        """分组"""
        for _ in range(self.group_num):
            # forest
            X, w = self.init_data()
            self.train(X, w)
        for r in self.a_group:
            print("r = ",r)
        self.a_final = sum(self.a_group)/len(self.a_group)
        print("Trained parameters:", self.a_final)

    def predict(self, test):
        """预测"""
        X_test = np.array([test])
        #X_test_mean = np.mean(X_test, axis=0)
        #X_test_std = np.std(X_test, axis=0)
        #X_test = (X_test - X_test_mean) / X_test_std # 不标准化测试数据
        
          
        X_test = np.hstack([np.ones((X_test.shape[0], 1)), X_test])  # 插入偏置列
        w_test = self.sigmoid(X_test @ self.a_final)
        print("Prediction = ", w_test)
        if w_test > 0.5 :
            result = "M!V!P!"
        else:
            result = "Lay Win Dog"
        return result


In [165]:
class Train_Data:
    """处理数据"""
    def __init__(self, col = 3, row = 600):
        self.col = col
        self.row = row
        self.X = None
        self.w = []
        self.df = None
        self.df_test = None
        
    def random_data(self):
        """数据哪来的的你别管"""
        self.X = np.random.randint(low=1, high=30, size=(self.row, self.col), dtype='int')
        #print("K  D  A")
        for r in self.X:
            #print(" ".join(map(str,r)),end = '')
            #mvp = int(input("MVP=1  or  LWD=0    "))
            #if mvp > 0:
            #    mvp = 1
            #else:
            #   mvp = 0
            
            if r[0] - r[1] > 10 or r[2] - r[1] > 20:
                mvp = 1
            else:
                mvp = 0
            
            self.w.append(mvp)
        return self.X, self.w

    def save_data(self, X, w):
        """保存数据"""
        W_save = np.array([w]).T
        X_save = np.append(X, W_save, axis=1)
        df = pd.DataFrame(X_save, columns = ['KILL','DEATH','ASSIST','GRADE'])
        df.to_csv(filepath, index=False, sep=',')
        print("writting succeed")

    def load_data(self):
        """加载数据"""
        self.df = pd.read_csv(filepath, header=0)
        self.df_test = self.df.sample(n=10, replace = False)
        print("load succeed")
        return self.df, self.df_test

    def init_data(self, df):
        """return X（二维自变量）, w（一维目的列表）"""
        #df_train = df.sample(n=30, replace = True)#随机取30组
        df_train = df
        self.X = np.array([df_train.KILL, df_train.DEATH, df_train.ASSIST]).T
        self.X = np.hstack([np.ones((self.X.shape[0],1)),self.X])#插入偏置列
        self.w = df_train.GRADE
        self.w = np.array(self.w)
        print("init succed")
        return self.X, self.w



In [167]:
#main
lo = Logistic()
lo.read_data()
lo.groups()


r =  [-1.33077865  0.89513454 -0.92423877  0.0977815 ]
r =  [-1.22783442  0.97470423 -0.9977501   0.14857659]
r =  [-1.38528957  0.81535357 -0.96071157  0.2073269 ]
r =  [-1.22610747  0.91173925 -1.11868304  0.25323077]
r =  [-1.3126267   0.93184915 -1.04455814  0.10209213]
Trained parameters: [-1.29652736  0.90575615 -1.00918832  0.16180158]


In [169]:
# 测试数据
X_test = [0, 20, 0]
print(lo.predict(X_test))
X_test = [12, 0, 10]
print(lo.predict(X_test))

Prediction =  [4.69058406e-10]
Lay Win Dog
Prediction =  [0.9999862]
M!V!P!
