# Battery Efficiency Analysis Project
Consolidated Jupyter Notebook

In [None]:
import pandas as pd
df = pd.read_csv('battery_efficiency_data_enriched.csv')
df.head()

## Feature Engineering

In [None]:
# Drop raw battery life string field
df.drop(columns=['Battery Life'], inplace=True)
X = df.drop(columns=['Battery Life (min)', 'Date of Measurement'])
y = df['Battery Life (min)']

## Machine Learning Models

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

categorical = ['Device Model', 'Operating System Version', 'Task Type']
numerical = [col for col in X.columns if col not in categorical]

preprocessor = ColumnTransformer([
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical),
    ('num', StandardScaler(), numerical)
])

models = {
    'Linear Regression': LinearRegression(),
    'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42),
    'Gradient Boosting': GradientBoostingRegressor(n_estimators=100, random_state=42)
}

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

results = []
for name, model in models.items():
    pipeline = Pipeline([('preprocessor', preprocessor), ('model', model)])
    pipeline.fit(X_train, y_train)
    pred = pipeline.predict(X_test)
    results.append({
        'Model': name,
        'MAE': mean_absolute_error(y_test, pred),
        'RMSE': mean_squared_error(y_test, pred, squared=False),
        'R2 Score': r2_score(y_test, pred)
    })
import pandas as pd
pd.DataFrame(results)