<a href="https://colab.research.google.com/github/JuliustheCreator/channel-growth-model/blob/main/analysis/youtube_models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **Importing Modules**


---



In [None]:
import pandas as pd
import tensorflow as tf
import pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from google.colab import files

#### **Importing Cleaned Dataset**


---



In [None]:
uploaded = files.upload()

with open("cleaned_data.pkl", "rb") as f:
    df = pickle.load(f)


Saving cleaned_data.pkl to cleaned_data.pkl


### **Cleaning Dataset Further for Model**

In [None]:
# Removing channels (rows) where there are no videos or no views
df = df.drop(df[(df['Video Views'] == 0) | (df['Video Count'] == 0)].index)

# Augmenting dataset (adding dummy channels)
for i in range(250):

  # Randomizing age around the mean
  age = np.random.normal(loc = df['Age'].mean(), scale = df['Age'].std())

  dummy_channel = pd.DataFrame({'Youtube Channel': ['Dummy Channel'], 'Subscribers':[0], 'Video Views':[0], 'Video Count':[0], 'Age':[int(age)]})

  df = pd.concat([df, dummy_channel], ignore_index = True)

### **Building the Regression Model (Neural Network)**


---



In [None]:
# Selecting required columns
X = df[['Video Views', 'Video Count', 'Age']]
y = df['Subscribers']

# Scaling the features
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 220)

# Creating Model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation = 'relu', input_shape = [X_train.shape[1]]),
    tf.keras.layers.Dense(32, activation = 'relu'),
    tf.keras.layers.Dense(1)
])

### **Training the Model**


---



In [None]:
model.compile(optimizer = 'adam', loss = 'mean_squared_error')

history = model.fit(X_train, y_train, epochs = 100, batch_size = 50, validation_split = 0.2)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

### **Evaluating the Model**


---



In [None]:
loss = model.evaluate(X_test, y_test)
print(f"Mean squared error: {loss}")

Mean squared error: 45.37457275390625


### **Exporting and Testing Model**


---



In [None]:
# To Test the Model with Custom Values
views = float(input('Input Video Views: ')) / 1_000_000
videos = float(input('Input Video Count: ')) / 1000
age = float(input('Input Age of Channel: '))

# Prediction
test_values = scaler.transform(np.array([[views, videos, age]]))

predicted_subscribers = model.predict(test_values)
print(f"Estimated Subscriber Count: {int(predicted_subscribers[0][0]) * 1_000_000:,}")

Input Video Views: 1000000
Input Video Count: 500
Input Age of Channel: 5
Estimated Subscriber Count: 7,000,000


