In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

### 1 Data Preprocessing: The dataset is loaded and preprocessed.

### 2 Model Training: It trains two models (Linear Regression and Random Forest).

### 3 Evaluation: The models are evaluated using metrics such as Mean Squared Error (MSE) and R².

### Visualizations:
* Histogram of RUL distribution.
* Heatmap showing feature correlation.
* Scatter plot comparing actual and predicted RUL.

### Step 1: Importing the necessary libraries


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns

### Step 2: Load the dataset


In [None]:
data = pd.read_csv('/kaggle/input/battery-remaining-useful-life-rul/Battery_RUL.csv')

In [None]:
print(data.info())

In [None]:
print(data.shape)

In [None]:
print(data.dtypes)

In [None]:
# Show the first few rows of the dataset
print(data.head())

In [None]:
# Check for missing values
print(data.isnull().sum())

### Step 3: Data Preprocessing (fill in missing values, drop irrelevant columns, etc.)
### Example: Dropping null values for simplicity

In [None]:
data = data.dropna()

### Step 4: Splitting the data into features (X) and target (y)


In [None]:
X = data.drop('RUL', axis=1)  # Assuming 'RUL' is the target variable
y = data['RUL']

### Step 5: Train-test split


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Step 6: Training models

## Linear Regression Model


In [None]:
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

## Random Forest Model


In [None]:
rf_model = RandomForestRegressor(n_estimators=100)
rf_model.fit(X_train, y_train)

### Step 7: Model Evaluation

## Predictions


In [None]:
y_pred_lr = lr_model.predict(X_test)
y_pred_rf = rf_model.predict(X_test)

## Metrics for Linear Regression



In [None]:
mse_lr = mean_squared_error(y_test, y_pred_lr)
r2_lr = r2_score(y_test, y_pred_lr)

### Metrics for Random Forest


In [None]:
mse_rf = mean_squared_error(y_test, y_pred_rf)
r2_rf = r2_score(y_test, y_pred_rf)

print(f"Linear Regression MSE: {mse_lr}, R²: {r2_lr}")
print(f"Random Forest MSE: {mse_rf}, R²: {r2_rf}")

### Step 8: Visualizations

## 1. Distribution of RUL



In [None]:
plt.figure(figsize=(10,6))
sns.histplot(data['RUL'], kde=True)
plt.title('RUL Distribution')
plt.show()

### 2. Feature Correlation Heatmap


In [None]:
plt.figure(figsize=(10,6))
sns.heatmap(data.corr(), annot=True)
plt.title('Feature Correlation Heatmap')
plt.show()

### 3. Actual vs Predicted (Random Forest)


In [None]:
plt.figure(figsize=(10,6))
plt.scatter(y_test, y_pred_rf)
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', lw=2)
plt.title('Actual vs Predicted RUL (Random Forest)')
plt.xlabel('Actual RUL')
plt.ylabel('Predicted RUL')
plt.show()

## Key Points:

* Linear Regression and Random Forest models are trained.

* Mean Squared Error (MSE) and R² scores are calculated to evaluate the models.

* Three simple plots are generated to visualize the RUL distribution, feature correlations, and model performance.