In [6]:
#Supervised Learning -- Classfication

from sklearn.linear_model import LogisticRegression
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Sample data (cloudiness, windiness) and labels (0 for no rain, 1 for rain)
# Let's say 0-5 is less likely for rain and 6-10 is more likely for rain

X = np.array([[1, 2], [2, 3], [3, 1], [5, 2], [7, 8], [8, 7], [9, 9], [6, 6]])
y = np.array([0, 0, 0, 0, 1, 1, 1, 1])

df = pd.DataFrame(X,columns = ('cloudiness','windiness'))
df['rain'] = y
df

Unnamed: 0,cloudiness,windiness,rain
0,1,2,0
1,2,3,0
2,3,1,0
3,5,2,0
4,7,8,1
5,8,7,1
6,9,9,1
7,6,6,1


In [9]:
#Train the Logistic Regression Model: We'll use our sample data to train the model.
#Make Predictions: We'll use the trained model to predict whether it will rain or not based on new data.

model = LogisticRegression()

model.fit(X,y)

new_data = np.array([[4,6]])
prediction = model.predict(new_data)

prediction[0]

1

The Logistic Function: The logistic function, also known as the sigmoid function, is given by:

sigma(z) = 1/1 + e^(-z)

where ( e ) is the base of the natural logarithm and ( z ) is a linear combination of the input features.

Linear Combination of Inputs: For our example with cloudiness and windiness as features, the linear combination ( z ) can be represented as:

z = [b + w1 x cloudiness + w2 x windiness]


Here, ( b ) is the bias term (intercept), and
 are the weights (coefficients) assigned to the cloudiness and windiness features, respectively. These weights are what the model learns during training.

Making Predictions: To predict whether it will rain or not, we calculate the value of ( z ) using the weights and bias learned by the model. Then, we apply the logistic function to ( z ) to get a probability:

probability of rain = sigma(z) = 1/ 1 + e^-(b + w1 x cloudiness + w2 x windiness)  

Interpreting the Output: The output of the logistic function is a probability between 0 and 1. If this probability is greater than a certain threshold (usually 0.5), we predict "rain" (1); otherwise, we predict "no rain" (0).

In [10]:
# Retrieve the learned weights (coefficients) and bias (intercept) from the model
weights = model.coef_[0]
bias = model.intercept_[0]

# Print the weights and bias
print("Weights:", weights)
print("Bias Term:", bias)

# New data point
cloudiness, windiness = 4, 6

# Calculate the linear combination (z)
z = bias + weights[0] * cloudiness + weights[1] * windiness

# Apply the logistic (sigmoid) function to compute the probability
probability_of_rain = 1 / (1 + np.exp(-z))

print("Probability of raining:", probability_of_rain)

Weights: [0.53298223 0.82509307]
Bias Term: -6.404848430627816
Probability of raining: 0.6632115264942609


# What is Churn Prediction?


Churn prediction is analytical studies on the possibility of a customer abandoning a product or service. The goal is to understand and take steps to change it before the customer gives up the product or service.

In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df1 = pd.read_csv("churn_prediction.csv")
df1.shape

(7043, 21)

In [15]:
df1.head(5)

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [16]:
df1.tail(5)

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
7038,6840-RESVB,Male,0,Yes,Yes,24,Yes,Yes,DSL,Yes,...,Yes,Yes,Yes,Yes,One year,Yes,Mailed check,84.8,1990.5,No
7039,2234-XADUH,Female,0,Yes,Yes,72,Yes,Yes,Fiber optic,No,...,Yes,No,Yes,Yes,One year,Yes,Credit card (automatic),103.2,7362.9,No
7040,4801-JZAZL,Female,0,Yes,Yes,11,No,No phone service,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.6,346.45,No
7041,8361-LTMKD,Male,1,Yes,No,4,Yes,Yes,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Mailed check,74.4,306.6,Yes
7042,3186-AJIEK,Male,0,No,No,66,Yes,No,Fiber optic,Yes,...,Yes,Yes,Yes,Yes,Two year,Yes,Bank transfer (automatic),105.65,6844.5,No


In [19]:
df1.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 21 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   customerID        7043 non-null   object 
 1   gender            7043 non-null   object 
 2   SeniorCitizen     7043 non-null   int64  
 3   Partner           7043 non-null   object 
 4   Dependents        7043 non-null   object 
 5   tenure            7043 non-null   int64  
 6   PhoneService      7043 non-null   object 
 7   MultipleLines     7043 non-null   object 
 8   InternetService   7043 non-null   object 
 9   OnlineSecurity    7043 non-null   object 
 10  OnlineBackup      7043 non-null   object 
 11  DeviceProtection  7043 non-null   object 
 12  TechSupport       7043 non-null   object 
 13  StreamingTV       7043 non-null   object 
 14  StreamingMovies   7043 non-null   object 
 15  Contract          7043 non-null   object 
 16  PaperlessBilling  7043 non-null   object 


In [21]:
df1.isna().sum()

customerID          0
gender              0
SeniorCitizen       0
Partner             0
Dependents          0
tenure              0
PhoneService        0
MultipleLines       0
InternetService     0
OnlineSecurity      0
OnlineBackup        0
DeviceProtection    0
TechSupport         0
StreamingTV         0
StreamingMovies     0
Contract            0
PaperlessBilling    0
PaymentMethod       0
MonthlyCharges      0
TotalCharges        0
Churn               0
dtype: int64

In [23]:
df1.describe()

Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges
count,7043.0,7043.0,7043.0
mean,0.162147,32.371149,64.761692
std,0.368612,24.559481,30.090047
min,0.0,0.0,18.25
25%,0.0,9.0,35.5
50%,0.0,29.0,70.35
75%,0.0,55.0,89.85
max,1.0,72.0,118.75
