In [2]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

# Define heights and weights arrays correctly
heights = np.array([150, 165, 170, 180, 175, 160, 185, 155, 172, 168])
weights = np.array([45, 52, 60, 72, 68, 50, 78, 48, 62, 85])

heights = heights.reshape(-1, 1)
x_train, x_test, y_train, y_test = train_test_split(heights, weights, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(x_train, y_train)

In [3]:
y_predicted = model.predict(x_test)
print(y_predicted)

[67.26573175 60.45115666]


In [5]:
new_height = 175
predicted_weight = model.predict([[new_height]])
predicted_weight

array([70.18626393])

In [6]:
print("Predicted weight for", new_height, "cm is", predicted_weight[0], "kg")

Predicted weight for 175 cm is 70.18626393383676 kg


## Logistic Regression : Customer will default on their loan

### Step 1 : Load the data

In [10]:
import pandas as pd

data = pd.read_csv('loan_data.csv')
print(data.head())

   age  income  loan_amount  credit_score  default
0   25   50000        20000           700        0
1   45   80000        25000           710        0
2   35   60000        30000           680        1
3   50  120000        10000           720        0
4   23   40000        15000           650        1


### Step 2 : Explore the data

In [11]:
print(data.describe())

            age         income   loan_amount  credit_score    default
count  10.00000      10.000000     10.000000     10.000000  10.000000
mean   37.10000   74200.000000  21200.000000    694.000000   0.300000
std    10.68176   26114.278768   6762.642482     29.135698   0.483046
min    23.00000   40000.000000  10000.000000    650.000000   0.000000
25%    29.50000   56250.000000  16250.000000    680.000000   0.000000
50%    35.50000   68500.000000  21000.000000    695.000000   0.000000
75%    44.25000   87500.000000  25750.000000    707.500000   0.750000
max    55.00000  120000.000000  30000.000000    750.000000   1.000000


### Step 3 : Process the data

In [12]:
print(data.isnull().sum())

age             0
income          0
loan_amount     0
credit_score    0
default         0
dtype: int64


In [13]:
X = data[['age', 'income', 'loan_amount', 'credit_score']]
y = data['default']

### Step 4 : Split the data into training and testing sets

In [15]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [18]:
import warnings

warnings.filterwarnings('ignore')
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(x_train, y_train)

### Step 6 : Evaluate the model

In [20]:
from sklearn.metrics import accuracy_score, precision_score, recall_score

y_pred = model.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)

Accuracy: 1.0


In [21]:
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
print('Precision:', precision)
print('Recall:', recall)

Precision: 0.0
Recall: 0.0


#### Task : Predicting Customer Churn: Given a set of customer features (age, subscription length, usage patterns, etc.), predict whether a customer will churn (cancel their service) or not. This is a binary classification problem (churn/no churn).

### Step 1 : Load the data

In [23]:
import pandas as pd

data = pd.read_csv('customer_churn_data.csv')
print(data)

    customer_id  age  gender  subscription_length  total_usage  \
0             1   25    Male                   12          500   
1             2   38  Female                   24         1200   
2             3   42    Male                    6          300   
3             4   28  Female                   18          900   
4             5   35    Male                   36         1800   
5             6   50  Female                   12          600   
6             7   22    Male                    3          150   
7             8   45  Female                   24         1500   
8             9   31    Male                   15          750   
9            10   27  Female                    6          400   
10           11   39    Male                   30         2000   
11           12   48  Female                   18         1000   
12           13   29    Male                    9          550   
13           14   33  Female                   24         1400   
14        

### Step 2 : Explore the data

In [24]:
print(data.describe())

       customer_id        age  subscription_length  total_usage  \
count     50.00000  50.000000            50.000000    50.000000   
mean      25.50000  35.720000            17.200000  1043.000000   
std       14.57738   9.644793            10.654117   738.587317   
min        1.00000  21.000000             2.000000   100.000000   
25%       13.25000  28.000000             9.000000   500.000000   
50%       25.50000  33.500000            15.000000   800.000000   
75%       37.75000  43.750000            24.000000  1575.000000   
max       50.00000  55.000000            40.000000  2800.000000   

       monthly_charges  
count        50.000000  
mean         52.200000  
std          36.869322  
min           5.000000  
25%          25.000000  
50%          40.000000  
75%          78.750000  
max         140.000000  


### Step 3 : Process the data

In [25]:
print(data.isnull().sum())

customer_id            0
age                    0
gender                 0
subscription_length    0
total_usage            0
monthly_charges        0
contract_type          0
churn                  0
dtype: int64


### Step 4 : Split the data into training and testing sets

In [26]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Step 5 : Train the model

In [27]:
import warnings

warnings.filterwarnings('ignore')
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(x_train, y_train)

### Step 6 : Evaluate the model

In [28]:
from sklearn.metrics import accuracy_score, precision_score, recall_score

y_pred = model.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)

Accuracy: 1.0


In [29]:
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
print('Precision:', precision)
print('Recall:', recall)

Precision: 0.0
Recall: 0.0
