In [2]:
import pandas as pd
import numpy as np


# Google Drive File IDs
train_file_id = "1-e3oa8IAgmjkikjirxHObjDXpiY0g51i"  # Replace with Gotem_Pumpkins.csv file ID
test_file_id = "1GMoK_4HOLIjCLuIdRD6pmLhSy1ymykQP"    # Replace with Freyja_Pumpkins.csv file ID

# Construct the URLs for direct access
train_file_url = f"https://drive.google.com/uc?id={train_file_id}"
test_file_url = f"https://drive.google.com/uc?id={test_file_id}"

# Load the datasets
train_data = pd.read_csv(train_file_url)
test_data = pd.read_csv(test_file_url)

# Print to verify the data is loaded correctly
print("Training Data:")
print(train_data.head())
print("\nTest Data:")
print(test_data.head())

# Separate features and target in the training data
X_train = train_data.drop('Class', axis=1).values  # Convert to numpy array
y_train = train_data['Class'].values  # Target as numpy array

# Extract features from test data
X_test = test_data.drop('Class', axis=1, errors='ignore').values  # Convert to numpy array

# Extract target from test data if available
if 'Class' in test_data.columns:
    y_test = test_data['Class'].values
else:
    y_test = np.array([])  # Empty array if no labels

# Convert target variables to numeric for both train and test
unique_classes = np.unique(y_train)  # Identify unique classes
class_to_int = {cls: idx for idx, cls in enumerate(unique_classes)}  # Map each class to an integer

# Apply mapping to convert targets to numeric
y_train_numeric = np.array([class_to_int[cls] for cls in y_train])
y_test_numeric = np.array([class_to_int[cls] for cls in y_test]) if y_test.size > 0 else np.array([])

# Standardize the data (feature scaling)
mean = X_train.mean(axis=0)
std = X_train.std(axis=0)
X_train = (X_train - mean) / std
X_test = (X_test - mean) / std

# Sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Logistic regression training
def train_logistic_regression(X, y, lr=0.01, epochs=1000):
    m, n = X.shape
    w = np.zeros(n)  # Initialize weights
    b = 0  # Initialize bias

    for _ in range(epochs):
        # Linear combination
        z = np.dot(X, w) + b
        # Sigmoid activation
        y_pred = sigmoid(z)

        # Compute gradients
        dw = (1 / m) * np.dot(X.T, (y_pred - y))
        db = (1 / m) * np.sum(y_pred - y)

        # Update weights and bias
        w -= lr * dw
        b -= lr * db

    return w, b

# Train logistic regression on training data
w, b = train_logistic_regression(X_train, y_train_numeric, lr=0.1, epochs=2000)

# Predict function
def predict(X, w, b):
    z = np.dot(X, w) + b
    y_pred = sigmoid(z)
    return (y_pred >= 0.5).astype(int)  # Convert probabilities to binary predictions

# Predict on the test data
y_pred = predict(X_test, w, b)

# Evaluate or display predictions
if y_test_numeric.size > 0:
    # Calculate accuracy if labels are available
    accuracy = np.mean(y_pred == y_test_numeric)
    print(f"Accuracy on test data: {accuracy * 100:.2f}%")
else:
    # Output predictions if no labels
    print("Predicted Pumpkin Types for the test dataset:")
    print(y_pred)




Training Data:
   Unnamed: 0    Area  Perimeter  Major_Axis_Length  Minor_Axis_Length  \
0           0   84930   1204.448           508.7883           213.0266   
1           1  103343   1275.261           521.4694           252.8941   
2           2   98350   1269.539           526.2346           238.4702   
3           3   69517   1059.538           429.8407           206.8250   
4           4   80011   1182.947           501.9065           204.7531   

   Convex_Area  Equiv_Diameter  Eccentricity  Solidity  Extent  Roundness  \
0        85665        328.8407        0.9081    0.9914  0.5482     0.7357   
1       104156        362.7401        0.8745    0.9922  0.7457     0.7985   
2        99184        353.8688        0.8914    0.9916  0.5919     0.7668   
3        70493        297.5093        0.8766    0.9862  0.6623     0.7782   
4        80902        319.1758        0.9130    0.9890  0.7374     0.7185   

   Aspect_Ration  Compactness          Class  
0         2.3884       0.6463 