In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from datetime import datetime
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [2]:
# Load the dataset
df = pd.read_csv(r'/Users/snrt1/Desktop/TV_Laayoune_Data.csv')

In [3]:
df

Unnamed: 0.1,Unnamed: 0,Program,Genre,Number of viewers,Audience share (%),Start time,Broadcasting date
0,0,Hiraf,Capsule,163000,22.0,21:15:00,2023-03-21
1,1,Midadou chouaara,Culturel,868000,13.8,06:17:00,2023-03-20
2,2,Fada Ryadi,Sport,681000,9.7,07:10:00,2023-03-19
3,3,Journal Télévisé,Info,615000,44.9,22:30:00,2023-03-18
4,4,Agenda,Info,868000,36.8,19:39:00,2023-03-17
...,...,...,...,...,...,...,...
1012,1012,alfich,Culturel,17570,23.0,12:40:00,2022-03-10
1013,1013,sibak kawafi,Culturel,27336,20.0,06:35:00,2022-03-27
1014,1014,Quiz Quran,Religieux,26035,38.0,17:53:00,2022-02-10
1015,1015,Maa Naha,Culturel,17691,31.0,19:15:00,2022-05-12


In [4]:
del df["Unnamed: 0"]

In [5]:
df

Unnamed: 0,Program,Genre,Number of viewers,Audience share (%),Start time,Broadcasting date
0,Hiraf,Capsule,163000,22.0,21:15:00,2023-03-21
1,Midadou chouaara,Culturel,868000,13.8,06:17:00,2023-03-20
2,Fada Ryadi,Sport,681000,9.7,07:10:00,2023-03-19
3,Journal Télévisé,Info,615000,44.9,22:30:00,2023-03-18
4,Agenda,Info,868000,36.8,19:39:00,2023-03-17
...,...,...,...,...,...,...
1012,alfich,Culturel,17570,23.0,12:40:00,2022-03-10
1013,sibak kawafi,Culturel,27336,20.0,06:35:00,2022-03-27
1014,Quiz Quran,Religieux,26035,38.0,17:53:00,2022-02-10
1015,Maa Naha,Culturel,17691,31.0,19:15:00,2022-05-12


# To check the Audience share (%)

In [6]:
# define the categorical columns
categorical_cols = ['Program', 'Genre', 'Start time', 'Broadcasting date']

# create the transformer to encode categorical columns
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ])

# encode the categorical columns and fit the model
X = preprocessor.fit_transform(df[categorical_cols])
y = df['Audience share (%)']
model = LinearRegression().fit(X, y)
# define the input variables
input_variables = {
    'Program': 'Hiraf',
    'Genre': 'Capsule',
    'Start time': '21:15:56',
    'Broadcasting date': '21/03/2023'
}

# encode the input variables and make the prediction
X_input = preprocessor.transform(pd.DataFrame(input_variables, index=[0]))
y_pred = model.predict(X_input)

print('Predicted audience share (%):', y_pred[0])

Predicted audience share (%): 27.41029531410358


# TO check the Number of viewers

In [7]:
# define the categorical columns
categorical_cols = ['Program', 'Genre', 'Start time', 'Broadcasting date']

# create the transformer to encode categorical columns
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ])

# encode the categorical columns and fit the model
X = preprocessor.fit_transform(df[categorical_cols])
y = df['Number of viewers']
model = LinearRegression().fit(X, y)
# define the input variables
input_variables = {
    'Program': 'Hiraf',
    'Genre': 'Capsule',
    'Start time': '21:15:56',
    'Broadcasting date': '21/03/2023'
}

# encode the input variables and make the prediction
X_input = preprocessor.transform(pd.DataFrame(input_variables, index=[0]))
y_pred = model.predict(X_input)

print('Number of viewers:', y_pred[0])

Number of viewers: 289.0019429121385


In [9]:
# Convert the 'Start time' column to seconds
df['Start time'] = pd.to_timedelta(df['Start time']).dt.total_seconds() / 3600

# Convert the 'Broadcasting date' column to datetime format
df['Broadcasting date'] = pd.to_datetime(df['Broadcasting date'])

# Extract the year, month, and day from the 'Broadcasting date' column
df['Year'] = df['Broadcasting date'].dt.year
df['Month'] = df['Broadcasting date'].dt.month
df['Day'] = df['Broadcasting date'].dt.day

# Convert 'Program' and 'Genre' columns to numeric values
label_encoder = LabelEncoder()
df['Program'] = label_encoder.fit_transform(df['Program'])
df['Genre'] = label_encoder.fit_transform(df['Genre'])

# Extract the features and target variables
X = df[['Number of viewers', 'Audience share (%)', 'Start time', 'Year', 'Month', 'Day']]
y = df[['Program', 'Genre']]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the linear regression model
model = LinearRegression()

# Train the model on the training data
model.fit(X_train, y_train) 

In [10]:
# Define a new data point

new_data_point = pd.DataFrame({
    'Number of viewers': [1000000],
    'Audience share (%)': [30],
    'Start time': [20.5],
    'Year': [2023],
    'Month': [4],
    'Day': [3]
})


# Make a prediction for the new data point
prediction = model.predict(new_data_point)

# Print the predicted values for 'Program' and 'Genre'
print(prediction)

[[6.71659566 2.1286203 ]]
