## Import dependencies

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.preprocessing import LabelEncoder

## Import the dataset into a dataframe

In [None]:
data=pd.read_csv('insurance.csv')
data.head()

In [None]:
data.shape

In [None]:
data.info()

In [None]:
# check for null values
data.isnull().sum()

In [None]:
data.sex.value_counts()

In [None]:
data.region.value_counts()

In [None]:
encoder = LabelEncoder()
labels = encoder.fit_transform(data.sex)
data['sex'] = labels
data.head()

In [None]:
labels = encoder.fit_transform(data.region)
data['region'] = labels
data.head()

In [None]:
labels = encoder.fit_transform(data.smoker)
data['smoker'] = labels
data.head()

## train test split the dataset

In [None]:
X = data.drop(columns='charges', axis=1)
y = data['charges']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=101)

print(X.shape, X_train.shape, X_test.shape)
print(y.shape, y_train.shape, y_test.shape)

## Model train

In [None]:
model = RandomForestRegressor()

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

score = metrics.r2_score(y_test, y_pred)
score

## Building prediction system

In [None]:
input_data = (19, 0, 27.9, 0, 1, 3)
input_data_array = np.asarray(input_data)

input_data_reshaped = input_data_array.reshape(1, -1)

prediction = model.predict(input_data_reshaped)
print(f"Predicted Medical Insurance Cost: {str(prediction)}")

## Saving the trained model

In [None]:
import pickle
filename = 'medical_insurance_cost_predictor_1.sav'
pickle.dump(model, open(filename, 'wb'))