In [236]:
import pandas as pd
import math
import random

In [4]:
nfldf = pd.read_csv("/Users/revan/Downloads/new_nfldata_2.csv")
nfldf.head()

Unnamed: 0,team,player_id,player_name,position,season,depth,targets,receptions,receiving_yards,receiving_air_yards,yards_after_catch,reception_td,reception_fumble_lost,PositionEncoded
0,TEN,00-0035676,A.J. Brown,WR,2019,2.0,84.0,52.0,1051.0,1107.0,465.0,8.0,0.0,1
1,TEN,00-0035676,A.J. Brown,WR,2020,1.0,106.0,70.0,1075.0,1150.0,432.0,12.0,1.0,1
2,TEN,00-0035676,A.J. Brown,WR,2021,1.0,105.0,63.0,869.0,1218.0,235.0,5.0,0.0,1
3,PHI,00-0035676,A.J. Brown,WR,2022,1.0,145.0,88.0,1496.0,1750.0,552.0,12.0,2.0,1
4,PHI,00-0035676,A.J. Brown,WR,2023,1.0,158.0,106.0,1456.0,1853.0,473.0,8.0,2.0,1


In [212]:
nfldf['yards_per_target'] = [y / (t + 1) for y, t in zip(nfldf['receiving_yards'], nfldf['targets'])]  # Avoid division by zero
nfldf['yards_per_reception'] = [y / (r + 1) for y, r in zip(nfldf['receiving_yards'], nfldf['receptions'])]
nfldf['td_per_reception'] = [td / (r + 1) for td, r in zip(nfldf['reception_td'], nfldf['receptions'])]
nfldf['targets_receptions'] = [t * r for t, r in zip(nfldf['targets'], nfldf['receptions'])]
nfldf['yards_touchdowns'] = [y * td for y, td in zip(nfldf['receiving_yards'], nfldf['reception_td'])]

In [214]:
feature_cols = ['targets', 'receptions', 'receiving_yards', 'receiving_air_yards',
                'yards_after_catch', 'reception_td', 'reception_fumble_lost',
                'yards_per_target', 'yards_per_reception', 'td_per_reception',
                'targets_receptions', 'yards_touchdowns']

In [216]:
for col in feature_cols:
    min_val, max_val = min(nfldf[col]), max(nfldf[col])
    nfldf[col] = [(x - min_val) / (max_val - min_val) for x in nfldf[col]]

In [218]:
# Prepare features (X) and target (y)
X = nfldf[feature_cols].values
y = nfldf['PositionEncoded'].values

In [220]:
X = (X - X.mean(axis=0)) / X.std(axis=0)

In [222]:
ones = [[1] for _ in range(len(X))]
X = [o + list(row) for o, row in zip(ones, X)] 

In [268]:
def sigmoid(z):
    if z >= 0:
        return 1 / (1 + math.exp(-z))
    else:
        exp_z = math.exp(z)  # Use e^z for stability when z < 0
        return exp_z / (1 + exp_z)

In [310]:
def logistic_regression(X, y, lr=0.01, epochs=10000):
    n_features = len(X[0])
    beta = [0.0] * n_features  # Initialize coefficients

    for epoch in range(epochs):
        # Shuffle training data for stochastic updates
        data = list(zip(X, y))
        random.shuffle(data)
        X, y = zip(*data)  # Unzip shuffled data

        # SGD: Update for each example
        for i in range(len(X)):
            z = sum(a * b for a, b in zip(X[i], beta))
            pred = sigmoid(z)
            error = pred - y[i]

            # Update coefficients based on one training sample at a time
            for j in range(n_features):
                beta[j] -= lr * error * X[i][j]

    return beta

In [312]:
coefficients = logistic_regression(X, y)

In [304]:
def predict(X, coefficients):
    predictions = []
    for row in X:
        z = sum(a * b for a, b in zip(row, coefficients))
        prob = sigmoid(z)
        predictions.append(1 if prob >= 0.5 else 0)
    return predictions

In [306]:
y_pred = predict(X, coefficients)

In [308]:
accuracy = sum(1 for actual, predicted in zip(y, y_pred) if actual == predicted) / len(y_pred)
#print("Coefficients:", coefficients)
#print("Predictions:", y_pred)
#print("Actual:", y)
print("Accuracy:", accuracy)

Accuracy: 0.941031941031941
