In [1]:
from random import seed
from random import randrange
from csv import reader
from math import exp
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import model_selection
from numpy import loadtxt, savetxt
from sklearn.cluster import KMeans
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import mean_squared_error

In [2]:
final = list()
finalmean = list()

In [3]:
# Logistic Regression on MOTOR SKILLS

# Load a CSV file
def load_csv(filename):
	dataset = list()
	with open(filename, 'r') as file:
		csv_reader = reader(file)
		for row in csv_reader:
			if not row:
				continue
			dataset.append(row)
	return dataset

# Convert string column to float
def str_column_to_float(dataset, column):
	for row in dataset:
		row[column] = float(row[column].strip())

# Find the min and max values for each column
def dataset_minmax(dataset):
	minmax = list()
	for i in range(len(dataset[0])):
		col_values = [row[i] for row in dataset]
		value_min = min(col_values)
		value_max = max(col_values)
		minmax.append([value_min, value_max])
	return minmax

# Rescale dataset columns to the range 0-1
def normalize_dataset(dataset, minmax):
	for row in dataset:
		for i in range(len(row)):
			row[i] = (row[i] - minmax[i][0]) / (minmax[i][1] - minmax[i][0])

# Split a dataset into k folds
def cross_validation_split(dataset, n_folds):
	dataset_split = list()
	dataset_copy = list(dataset)
	fold_size = int(len(dataset) / n_folds)
	for i in range(n_folds):
		fold = list()
		while len(fold) < fold_size:
			index = randrange(len(dataset_copy))
			fold.append(dataset_copy.pop(index))
		dataset_split.append(fold)
	return dataset_split

# Calculate accuracy percentage
def accuracy_metric(actual, predicted):
	#print(confusion_matrix(actual,predicted).ravel())
	#print(classification_report(actual,predicted)) 
	correct = 0
	for i in range(len(actual)):
		if actual[i] == predicted[i]:
			correct += 1
	return correct / float(len(actual)) * 100.0

# Evaluate an algorithm using a cross validation split
def evaluate_algorithm(dataset, algorithm, n_folds, *args):
	folds = cross_validation_split(dataset, n_folds)
	scores = list()
	final_pred = list()
	final_actu = list()
	for fold in folds:
		train_set = list(folds)
		train_set.remove(fold)
		train_set = sum(train_set, [])
		test_set = list()
		for row in fold:
			row_copy = list(row)
			test_set.append(row_copy)
			row_copy[-1] = None
		predicted = algorithm(train_set, test_set, *args)
		actual = [row[-1] for row in fold]
		final_pred += predicted
		final_actu += actual        
		accuracy = accuracy_metric(actual, predicted)
		scores.append(accuracy)
	return scores, final_pred, final_actu

# Make a prediction with coefficients
def predict(row, coefficients):
	yhat = coefficients[0]
	for i in range(len(row)-1):
		yhat += coefficients[i + 1] * row[i]
	return 1.0 / (1.0 + exp(-yhat))

# Estimate logistic regression coefficients using stochastic gradient descent
def coefficients_sgd(train, l_rate, n_epoch):
	coef = [0.0 for i in range(len(train[0]))]
	for epoch in range(n_epoch):
		for row in train:
			yhat = predict(row, coef)
			error = row[-1] - yhat
			coef[0] = coef[0] + l_rate * error * yhat * (1.0 - yhat)
			for i in range(len(row)-1):
				coef[i + 1] = coef[i + 1] + l_rate * error * yhat * (1.0 - yhat) * row[i]
	return coef

# Linear Regression Algorithm With Stochastic Gradient Descent
def logistic_regression(train, test, l_rate, n_epoch):
	predictions = list()
	coef = coefficients_sgd(train, l_rate, n_epoch)
	for row in test:
		yhat = predict(row, coef)
		yhat = round(yhat)
		predictions.append(yhat)
	return(predictions)

# Test the logistic regression algorithm on the diabetes dataset
#seed(1)
# load and prepare data
filename = r'C:\Users\rishi\Desktop\Logdata.csv'
dataset = load_csv(filename)
for i in range(len(dataset[0])):
	str_column_to_float(dataset, i)
# normalize
minmax = dataset_minmax(dataset)
normalize_dataset(dataset, minmax)
# evaluate algorithm
for checknum in range(1,21):
    n_folds = 9
    l_rate = 0.6
    n_epoch = 300
    scores, final_predicted, final_actual = evaluate_algorithm(dataset, logistic_regression, n_folds, l_rate, n_epoch)
    print('Scores: %s' % scores)
    mean_acc = sum(scores)/float(len(scores))
    final.append(mean_acc)
    finalmean.append(mean_squared_error(final_actual, final_predicted))
    print('Mean Accuracy: %.3f%%' % (mean_acc))
    print(confusion_matrix(final_actual,final_predicted).ravel())
    print(classification_report(final_actual,final_predicted))
    print(mean_squared_error(final_actual, final_predicted))
print(np.mean(final))
print(np.mean(finalmean))

Scores: [93.75, 93.75, 100.0, 100.0, 93.75, 100.0, 81.25, 100.0, 93.75]
Mean Accuracy: 95.139%
[106   2   5  31]
              precision    recall  f1-score   support

         0.0       0.95      0.98      0.97       108
         1.0       0.94      0.86      0.90        36

    accuracy                           0.95       144
   macro avg       0.95      0.92      0.93       144
weighted avg       0.95      0.95      0.95       144

0.04861111111111111
Scores: [100.0, 93.75, 100.0, 93.75, 100.0, 100.0, 87.5, 93.75, 100.0]
Mean Accuracy: 96.528%
[109   0   5  30]
              precision    recall  f1-score   support

         0.0       0.96      1.00      0.98       109
         1.0       1.00      0.86      0.92        35

    accuracy                           0.97       144
   macro avg       0.98      0.93      0.95       144
weighted avg       0.97      0.97      0.96       144

0.034722222222222224
Scores: [93.75, 100.0, 81.25, 93.75, 100.0, 100.0, 100.0, 100.0, 100.0]
Mean Acc

Scores: [100.0, 100.0, 100.0, 100.0, 93.75, 93.75, 100.0, 93.75, 93.75]
Mean Accuracy: 97.222%
[109   0   4  31]
              precision    recall  f1-score   support

         0.0       0.96      1.00      0.98       109
         1.0       1.00      0.89      0.94        35

    accuracy                           0.97       144
   macro avg       0.98      0.94      0.96       144
weighted avg       0.97      0.97      0.97       144

0.027777777777777776
Scores: [87.5, 100.0, 93.75, 87.5, 93.75, 100.0, 100.0, 100.0, 100.0]
Mean Accuracy: 95.833%
[108   0   6  30]
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.97       108
         1.0       1.00      0.83      0.91        36

    accuracy                           0.96       144
   macro avg       0.97      0.92      0.94       144
weighted avg       0.96      0.96      0.96       144

0.041666666666666664
96.28472222222221
0.03715277777777777


In [4]:
# from sklearn.linear_model import LogisticRegression
# from numpy import loadtxt, savetxt

In [5]:
# y = np.loadtxt("kmeanpredict.csv", delimiter=",")
# dataset = pd.read_csv(r'C:\Users\rishi\Desktop\FinalDataset.csv')
# data = dataset.copy()
# data = pd.DataFrame(data)
# data.columns = ['Age', 'Brush', 'Ball', 'Watch', 'Mobile', 'Duster']
# X = data[['Age', 'Brush', 'Ball', 'Watch', 'Mobile', 'Duster']].to_numpy()
# print(y)
# LRG = LogisticRegression(max_iter = 300, random_state = 0,solver = 'liblinear',multi_class = 'auto').fit(X, y)
# print(LRG.score(X, y))
# LRG.densify()