In [6]:
import math

In [7]:
class LinearRegression:
    def __init__(self, learning_rate=0.01, n_iterations=1000):
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples = len(X)
        n_features = len(X[0])
        self.weights = [0] * n_features
        self.bias = 0

        for _ in range(self.n_iterations):
            y_predicted = self.predict(X)

            dw = [0] * n_features
            db = 0
            for i in range(n_samples):
                error = y_predicted[i] - y[i]
                for j in range(n_features):
                    dw[j] += (1 / n_samples) * error * X[i][j]
                db += (1 / n_samples) * error

            for j in range(n_features):
                self.weights[j] -= self.learning_rate * dw[j]
            self.bias -= self.learning_rate * db

    def predict(self, X):
        return [sum(x[j] * self.weights[j] for j in range(len(self.weights))) + self.bias for x in X]

In [8]:
def mean_squared_error(y_true, y_pred):
    return sum((y_true[i] - y_pred[i])**2 for i in range(len(y_true))) / len(y_true)

In [9]:
def read_csv(filename):
    with open(filename, 'r') as f:
        lines = f.readlines()
    
    # Assuming the first line is the header
    header = lines[0].strip().split(',')
    data = [line.strip().split(',') for line in lines[1:]]
    
    # Convert string values to float
    data = [[float(value) for value in row] for row in data]
    
    return header, data

In [11]:
def preprocess_data(data):
    return [[float(val) for val in row] for row in data]

def split_features_target(data):
    X = [[row[0], row[1]] for row in data]  # Age and Height
    y = [row[2] for row in data]   # Weight
    return X, y

In [12]:
def normalize_data(X):
    n_features = len(X[0])
    min_values = [min(X[i][j] for i in range(len(X))) for j in range(n_features)]
    max_values = [max(X[i][j] for i in range(len(X))) for j in range(n_features)]
    
    normalized_X = []
    for row in X:
        normalized_row = [(row[j] - min_values[j]) / (max_values[j] - min_values[j]) if max_values[j] != min_values[j] else 0 for j in range(n_features)]
        normalized_X.append(normalized_row)
    
    return normalized_X, min_values, max_values

In [13]:
def train_test_split(X, y, test_size=0.2):
    combined = list(zip(X, y))
    random.shuffle(combined)
    X[:], y[:] = zip(*combined)
    split_index = int(len(X) * (1 - test_size))
    return X[:split_index], X[split_index:], y[:split_index], y[split_index:]


In [16]:
import csv
import random
from sklearn.preprocessing import LabelEncoder

# Linear Regression model class
class LinearRegression:
    def __init__(self, learning_rate=0.01, n_iterations=1000):
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples = len(X)
        n_features = len(X[0])
        self.weights = [0] * n_features
        self.bias = 0

        for _ in range(self.n_iterations):
            y_predicted = self.predict(X)

            dw = [0] * n_features
            db = 0
            for i in range(n_samples):
                error = y_predicted[i] - y[i]
                for j in range(n_features):
                    dw[j] += (1 / n_samples) * error * X[i][j]
                db += (1 / n_samples) * error

            for j in range(n_features):
                self.weights[j] -= self.learning_rate * dw[j]
            self.bias -= self.learning_rate * db

    def predict(self, X):
        return [sum(x[j] * self.weights[j] for j in range(len(self.weights))) + self.bias for x in X]

# Function to calculate mean squared error
def mean_squared_error(y_true, y_pred):
    return sum((y_true[i] - y_pred[i])**2 for i in range(len(y_true))) / len(y_true)

# Function to read CSV data
def read_csv(filename):
    with open(filename, 'r') as f:
        reader = csv.reader(f)
        header = next(reader)  # Skip the header row
        raw_data = list(reader)
    
    return header, raw_data

# Preprocessing function to encode Gender and convert numeric columns
def preprocess_data(raw_data):
    gender_encoder = LabelEncoder()
    genders = [row[0] for row in raw_data]
    gender_encoded = gender_encoder.fit_transform(genders)
    
    data = [[gender_encoded[i], float(row[1]), float(row[2])] for i, row in enumerate(raw_data)]
    return data

# Function to split features and target
def split_features_target(data):
    X = [[row[0], row[1]] for row in data]  # Gender and Height as features
    y = [row[2] for row in data]  # Weight as target
    return X, y

# Function to normalize features
def normalize_data(X):
    n_features = len(X[0])
    min_values = [min(X[i][j] for i in range(len(X))) for j in range(n_features)]
    max_values = [max(X[i][j] for i in range(len(X))) for j in range(n_features)]
    
    normalized_X = []
    for row in X:
        normalized_row = [(row[j] - min_values[j]) / (max_values[j] - min_values[j]) if max_values[j] != min_values[j] else 0 for j in range(n_features)]
        normalized_X.append(normalized_row)
    
    return normalized_X, min_values, max_values

# Function to split data into training and testing sets
def train_test_split(X, y, test_size=0.2):
    combined = list(zip(X, y))
    random.shuffle(combined)
    X[:], y[:] = zip(*combined)
    split_index = int(len(X) * (1 - test_size))
    return X[:split_index], X[split_index:], y[:split_index], y[split_index:]

if __name__ == "__main__":
    # Read and preprocess data
    filename = "/kaggle/input/weight-height/weight-height.csv"  # Make sure this file is in the same directory as your script
    header, raw_data = read_csv(filename)
    data = preprocess_data(raw_data)
    
    # Split features and target
    X, y = split_features_target(data)
    
    # Normalize features
    X_normalized, min_values, max_values = normalize_data(X)
    
    # Split into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X_normalized, y)
    
    # Create and train the model
    model = LinearRegression(learning_rate=0.01, n_iterations=10000)
    model.fit(X_train, y_train)
    
    # Make predictions on test set
    predictions = model.predict(X_test)
    
    print("Weights:", model.weights)
    print("Bias:", model.bias)
    print("MSE on test set:", mean_squared_error(y_test, predictions))

    # Predict weight for a new sample
    new_sample = [0, 170]  # Gender: 0 (Male), Height: 170 cm
    normalized_sample = [(new_sample[j] - min_values[j]) / (max_values[j] - min_values[j]) if max_values[j] != min_values[j] else 0 for j in range(len(new_sample))]
    new_prediction = model.predict([normalized_sample])[0]
    print(f"Predicted weight for new sample (Gender: Male, Height: 170 cm): {new_prediction:.2f} kg")


Weights: [26.40679516244484, 114.82997440685335]
Bias: 92.1083623044603
MSE on test set: 115.70284558366869
Predicted weight for new sample (Gender: Male, Height: 170 cm): 629.39 kg


In [1]:
import csv
import random
from sklearn.preprocessing import LabelEncoder

# Linear Regression model class
class LinearRegression:
    def __init__(self, learning_rate=0.01, n_iterations=1000):
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples = len(X)
        n_features = len(X[0])
        self.weights = [0] * n_features
        self.bias = 0

        for _ in range(self.n_iterations):
            y_predicted = self.predict(X)

            dw = [0] * n_features
            db = 0
            for i in range(n_samples):
                error = y_predicted[i] - y[i]
                for j in range(n_features):
                    dw[j] += (1 / n_samples) * error * X[i][j]
                db += (1 / n_samples) * error

            for j in range(n_features):
                self.weights[j] -= self.learning_rate * dw[j]
            self.bias -= self.learning_rate * db

    def predict(self, X):
        return [sum(x[j] * self.weights[j] for j in range(len(self.weights))) + self.bias for x in X]

# Function to calculate mean squared error
def mean_squared_error(y_true, y_pred):
    return sum((y_true[i] - y_pred[i])**2 for i in range(len(y_true))) / len(y_true)

# Function to read CSV data
def read_csv(filename):
    with open(filename, 'r') as f:
        reader = csv.reader(f)
        header = next(reader)  # Skip the header row
        raw_data = list(reader)
    
    return header, raw_data

# Preprocessing function to encode Gender and convert numeric columns
def preprocess_data(raw_data):
    gender_encoder = LabelEncoder()
    genders = [row[0] for row in raw_data]
    gender_encoded = gender_encoder.fit_transform(genders)
    
    data = [[gender_encoded[i], float(row[1]), float(row[2])] for i, row in enumerate(raw_data)]
    return data, gender_encoder

# Function to split features and target
def split_features_target(data):
    X = [[row[0], row[1]] for row in data]  # Gender and Height as features
    y = [row[2] for row in data]  # Weight as target
    return X, y

# Function to normalize features
def normalize_data(X):
    n_features = len(X[0])
    min_values = [min(X[i][j] for i in range(len(X))) for j in range(n_features)]
    max_values = [max(X[i][j] for i in range(len(X))) for j in range(n_features)]
    
    normalized_X = []
    for row in X:
        normalized_row = [(row[j] - min_values[j]) / (max_values[j] - min_values[j]) if max_values[j] != min_values[j] else 0 for j in range(n_features)]
        normalized_X.append(normalized_row)
    
    return normalized_X, min_values, max_values

# Function to split data into training and testing sets
def train_test_split(X, y, test_size=0.2):
    combined = list(zip(X, y))
    random.shuffle(combined)
    X[:], y[:] = zip(*combined)
    split_index = int(len(X) * (1 - test_size))
    return X[:split_index], X[split_index:], y[:split_index], y[split_index:]

if __name__ == "__main__":
    # Read and preprocess data
    filename = "/kaggle/input/weight-height/weight-height.csv"  # Make sure this file is in the same directory as your script
    header, raw_data = read_csv(filename)
    data, gender_encoder = preprocess_data(raw_data)
    
    # Split features and target
    X, y = split_features_target(data)
    
    # Normalize features
    X_normalized, min_values, max_values = normalize_data(X)
    
    # Split into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X_normalized, y)
    
    # Create and train the model
    model = LinearRegression(learning_rate=0.01, n_iterations=10000)
    model.fit(X_train, y_train)
    
    # Make predictions on test set
    predictions = model.predict(X_test)
    
    print("Weights:", model.weights)
    print("Bias:", model.bias)
    print("MSE on test set:", mean_squared_error(y_test, predictions))

    # Predict weight for a new sample
    new_sample = ['Male', 170]  # Gender: Male, Height: 170 cm
    gender_encoded_sample = gender_encoder.transform([new_sample[0]])[0]
    height_sample = new_sample[1]
    sample = [gender_encoded_sample, height_sample]
    
    # Normalize the new sample using min and max values from the training set
    normalized_sample = [(sample[j] - min_values[j]) / (max_values[j] - min_values[j]) if max_values[j] != min_values[j] else 0 for j in range(len(sample))]
    
    new_prediction = model.predict([normalized_sample])[0]
    print(f"Predicted weight for new sample (Gender: {new_sample[0]}, Height: {new_sample[1]} cm): {new_prediction:.2f} kg")


Weights: [26.649913754479037, 114.44144216671833]
Bias: 92.28058853893783
MSE on test set: 114.13862956486709
Predicted weight for new sample (Gender: Male, Height: 170 cm): 654.40 kg


In [2]:
import csv
import random
from sklearn.preprocessing import LabelEncoder

# Linear Regression model class
class LinearRegression:
    def __init__(self, learning_rate=0.001, n_iterations=1000):  # Reduced learning rate
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples = len(X)
        n_features = len(X[0])
        self.weights = [0] * n_features
        self.bias = 0

        for _ in range(self.n_iterations):
            y_predicted = self.predict(X)

            dw = [0] * n_features
            db = 0
            for i in range(n_samples):
                error = y_predicted[i] - y[i]
                for j in range(n_features):
                    dw[j] += (1 / n_samples) * error * X[i][j]
                db += (1 / n_samples) * error

            for j in range(n_features):
                self.weights[j] -= self.learning_rate * dw[j]
            self.bias -= self.learning_rate * db

    def predict(self, X):
        return [sum(x[j] * self.weights[j] for j in range(len(self.weights))) + self.bias for x in X]

# Function to calculate mean squared error
def mean_squared_error(y_true, y_pred):
    return sum((y_true[i] - y_pred[i])**2 for i in range(len(y_true))) / len(y_true)

# Function to read CSV data
def read_csv(filename):
    with open(filename, 'r') as f:
        reader = csv.reader(f)
        header = next(reader)  # Skip the header row
        raw_data = list(reader)
    
    return header, raw_data

# Preprocessing function to encode Gender and convert numeric columns
def preprocess_data(raw_data):
    gender_encoder = LabelEncoder()
    genders = [row[0] for row in raw_data]
    gender_encoded = gender_encoder.fit_transform(genders)
    
    data = [[gender_encoded[i], float(row[1]), float(row[2])] for i, row in enumerate(raw_data)]
    return data, gender_encoder

# Function to split features and target
def split_features_target(data):
    X = [[row[0], row[1]] for row in data]  # Gender and Height as features
    y = [row[2] for row in data]  # Weight as target
    return X, y

# Function to normalize features
def normalize_data(X):
    n_features = len(X[0])
    min_values = [min(X[i][j] for i in range(len(X))) for j in range(n_features)]
    max_values = [max(X[i][j] for i in range(len(X))) for j in range(n_features)]
    
    normalized_X = []
    for row in X:
        normalized_row = [(row[j] - min_values[j]) / (max_values[j] - min_values[j]) if max_values[j] != min_values[j] else 0 for j in range(n_features)]
        normalized_X.append(normalized_row)
    
    print("Min values:", min_values)  # Debug: Check min values
    print("Max values:", max_values)  # Debug: Check max values
    
    return normalized_X, min_values, max_values

# Function to split data into training and testing sets
def train_test_split(X, y, test_size=0.2):
    combined = list(zip(X, y))
    random.shuffle(combined)
    X[:], y[:] = zip(*combined)
    split_index = int(len(X) * (1 - test_size))
    return X[:split_index], X[split_index:], y[:split_index], y[split_index:]

if __name__ == "__main__":
    # Read and preprocess data
    filename = "/kaggle/input/weight-height/weight-height.csv"  # Ensure this file is in the same directory
    header, raw_data = read_csv(filename)
    data, gender_encoder = preprocess_data(raw_data)
    
    # Split features and target
    X, y = split_features_target(data)
    
    # Normalize features
    X_normalized, min_values, max_values = normalize_data(X)
    
    # Split into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X_normalized, y)
    
    # Create and train the model
    model = LinearRegression(learning_rate=0.001, n_iterations=10000)  # Adjusted learning rate and iterations
    model.fit(X_train, y_train)
    
    # Make predictions on test set
    predictions = model.predict(X_test)
    
    print("Weights:", model.weights)
    print("Bias:", model.bias)
    print("MSE on test set:", mean_squared_error(y_test, predictions))

    # Predict weight for a new sample
    new_sample = ['Male', 170]  # Gender: Male, Height: 170 cm
    gender_encoded_sample = gender_encoder.transform([new_sample[0]])[0]
    height_sample = new_sample[1]
    sample = [gender_encoded_sample, height_sample]
    
    # Normalize the new sample using min and max values from the training set
    normalized_sample = [(sample[j] - min_values[j]) / (max_values[j] - min_values[j]) if max_values[j] != min_values[j] else 0 for j in range(len(sample))]
    
    new_prediction = model.predict([normalized_sample])[0]
    print(f"Predicted weight for new sample (Gender: {new_sample[0]}, Height: {new_sample[1]} cm): {new_prediction:.2f} kg")


Min values: [0, 54.2631333250971]
Max values: [1, 78.9987423463896]
Weights: [41.094459396365465, 62.63580714225108]
Bias: 110.22078875960217
MSE on test set: 194.603503119097
Predicted weight for new sample (Gender: Male, Height: 170 cm): 444.39 kg


In [1]:
import csv
import random
from sklearn.preprocessing import LabelEncoder

# Linear Regression model class
class LinearRegression:
    def __init__(self, learning_rate=0.01, n_iterations=1000):
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples = len(X)
        n_features = len(X[0])
        self.weights = [0] * n_features
        self.bias = 0

        for _ in range(self.n_iterations):
            y_predicted = self.predict(X)

            dw = [0] * n_features
            db = 0
            for i in range(n_samples):
                error = y_predicted[i] - y[i]
                for j in range(n_features):
                    dw[j] += (1 / n_samples) * error * X[i][j]
                db += (1 / n_samples) * error

            for j in range(n_features):
                self.weights[j] -= self.learning_rate * dw[j]
            self.bias -= self.learning_rate * db

    def predict(self, X):
        return [sum(x[j] * self.weights[j] for j in range(len(self.weights))) + self.bias for x in X]

# Function to calculate mean squared error
def mean_squared_error(y_true, y_pred):
    return sum((y_true[i] - y_pred[i])**2 for i in range(len(y_true))) / len(y_true)

# Function to read CSV data
def read_csv(filename):
    with open(filename, 'r') as f:
        reader = csv.reader(f)
        header = next(reader)  # Skip the header row
        raw_data = list(reader)
    
    return header, raw_data

# Preprocessing function to encode Gender and convert numeric columns
def preprocess_data(raw_data):
    gender_encoder = LabelEncoder()
    genders = [row[0] for row in raw_data]
    gender_encoded = gender_encoder.fit_transform(genders)
    
    data = [[gender_encoded[i], float(row[1]), float(row[2])] for i, row in enumerate(raw_data)]
    return data

# Function to split features and target
def split_features_target(data):
    X = [[row[0], row[1]] for row in data]  # Gender and Height as features
    y = [row[2] for row in data]  # Weight as target
    return X, y

# Function to normalize features
def normalize_data(X):
    n_features = len(X[0])
    min_values = [min(X[i][j] for i in range(len(X))) for j in range(n_features)]
    max_values = [max(X[i][j] for i in range(len(X))) for j in range(n_features)]
    
    normalized_X = []
    for row in X:
        normalized_row = [(row[j] - min_values[j]) / (max_values[j] - min_values[j]) if max_values[j] != min_values[j] else 0 for j in range(n_features)]
        normalized_X.append(normalized_row)
    
    return normalized_X, min_values, max_values

# Function to split data into training and testing sets
def train_test_split(X, y, test_size=0.2):
    combined = list(zip(X, y))
    random.shuffle(combined)
    X[:], y[:] = zip(*combined)
    split_index = int(len(X) * (1 - test_size))
    return X[:split_index], X[split_index:], y[:split_index], y[split_index:]

if __name__ == "__main__":
    # Read and preprocess data
    filename = "/kaggle/input/500-person-gender-height-weight-bodymassindex/500_Person_Gender_Height_Weight_Index.csv"  # Update with the path to your file
    header, raw_data = read_csv(filename)
    data = preprocess_data(raw_data)
    
    # Split features and target
    X, y = split_features_target(data)
    
    # Normalize features
    X_normalized, min_values, max_values = normalize_data(X)
    
    # Split into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X_normalized, y)
    
    # Create and train the model
    model = LinearRegression(learning_rate=0.01, n_iterations=10000)
    model.fit(X_train, y_train)
    
    # Make predictions on test set
    predictions = model.predict(X_test)
    
    print("Weights:", model.weights)
    print("Bias:", model.bias)
    print("MSE on test set:", mean_squared_error(y_test, predictions))

    # Predict weight for a new sample
    new_sample = [0, 170]  # Gender: 0 (Male), Height: 170 cm
    normalized_sample = [(new_sample[j] - min_values[j]) / (max_values[j] - min_values[j]) if max_values[j] != min_values[j] else 0 for j in range(len(new_sample))]
    new_prediction = model.predict([normalized_sample])[0]
    print(f"Predicted weight for new sample (Gender: Male, Height: 170 cm): {new_prediction:.2f} kg")


Weights: [0.6634581636881086, -0.6710660511780178]
Bias: 107.01357900725439
MSE on test set: 1027.3330981221616
Predicted weight for new sample (Gender: Male, Height: 170 cm): 106.67 kg
