In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from google.colab import drive


import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline



df_train = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/meow_cleaned.csv')

In [None]:
df_train = df_train.dropna(subset=['AdoptionSpeed'])

# Features and target

In [None]:
X = df_train[['Type', 'Age', 'Breed1', 'Breed2', 'Gender', 'Color1', 'Color2',
              'Color3', 'MaturitySize', 'FurLength', 'Vaccinated', 'Dewormed',
              'Sterilized', 'Health', 'Quantity', 'Fee', 'State', 'VideoAmt', 'PhotoAmt']]

y = df_train['AdoptionSpeed']


#Preprocessing Pipeline

In [None]:

# Preprocessing Pipeline: Impute missing values and encode categorical features
preprocessor = ColumnTransformer(
    transformers=[
        ('num', SimpleImputer(strategy='mean'), ['Age', 'Quantity', 'Fee', 'VideoAmt', 'PhotoAmt']),
        ('cat', Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='most_frequent')),
            ('encoder', OneHotEncoder(handle_unknown='ignore'))]),
         ['Type', 'Breed1', 'Breed2', 'Gender', 'Color1', 'Color2', 'Color3', 'MaturitySize',
          'FurLength', 'Vaccinated', 'Dewormed', 'Sterilized', 'Health', 'State'])
    ])


In [None]:

pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('scaler', StandardScaler(with_mean=False)),
    ('classifier', LogisticRegression(max_iter=5000, solver='saga', penalty='elasticnet', l1_ratio=0.5))
])


# Splitting dataset into training and testing sets


In [None]:


X = df_train.drop('AdoptionSpeed', axis=1)
y = df_train['AdoptionSpeed']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:

# Train the model
pipeline.fit(X_train, y_train)

# Make predictions
y_pred = pipeline.predict(X_test)

In [None]:
# Evaluate the model
print(classification_report(y_test, y_pred))
print(accuracy_score(y_test, y_pred))
accuracy_percentage = accuracy_score(y_test, y_pred) * 100
print(f"Accuracy: {accuracy_percentage:.2f}%")

              precision    recall  f1-score   support

           0       0.20      0.03      0.04        79
           1       0.35      0.30      0.33       620
           2       0.34      0.42      0.37       811
           3       0.34      0.17      0.23       644
           4       0.46      0.62      0.53       839

    accuracy                           0.39      2993
   macro avg       0.34      0.31      0.30      2993
weighted avg       0.37      0.39      0.37      2993

0.38623454727697965
Accuracy: 38.62%
