<a href="https://colab.research.google.com/github/Festuskipkoech/DeepLearning/blob/main/MedialPrediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (Input, Dense,Embedding, Flatten, concatenate, Dropout, BatchNormalization, GlobalAveragePooling2D)
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
# random seeds for productivity
np.random.seed(42)
tf.random.set_seed(42)

In [4]:
# generate synthetic data
def generate_synthetic_data(n_samples=1000):
  # dummy data
  images = np.random.rand(n_samples, 224, 224, 3)

  # generate the data in tabular form
  tabular_data = pd.DataFrame({
      'age':np.random.normal(60, 15, n_samples),
      'gender':np.random.choice(['M', 'F'], n_samples),
      'smoking_status':np.random.choice(['Never', 'Former', 'Current'], n_samples),
      'blood_pressure':np.random.normal(130, 20, n_samples),
      'cholestrol':np.random.normal(200, 40, n_samples),
      'bmi':np.random.normal(25, 5, n_samples)
  })
  # synthetic labels
  labels= np.random.binomial(1, 0.3, n_samples)
  return images, tabular_data, labels


In [7]:
# process tabular data
def process_tabular_data(df):
  df_processed =df.copy()

  # initialize dict to store preprocessed objects
  label_encoders={}
  numerical_scaler = StandardScaler()

  # categorical columns
  categorical_cols = ['gender', 'smoking_status']
  numerical_cols = ['age', 'blood_pressure', 'cholestrol', 'bmi']

  # label encode categorical varible
  for col in categorical_cols:
    label_encoders[col] = LabelEncoder()
    df_processed[col] = label_encoders[col].fit_transform(df_processed[col])
  df_processed[numerical_cols] = numerical_scaler.fit_transform(df_processed[numerical_cols])
  return df_processed, label_encoders, numerical_scaler



In [8]:
from re import DEBUG
# create multi model modal
def create_multilmodal_model(image_shape, num_numerical_vars, categorical_cardinalities):
  # image stream
  image_input = Input(shape = image_shape)
  base_model = ResNet50(weights='imagenet', include_top=False, input_tensor=image_input)

  x_img = GlobalAveragePooling2D()(base_model.output)
  x_img = Dense(512, activation='relu')(x_img)
  x_img =BatchNormalization()(x_img)
  x_img =Dense(256, activation='relu')(x_img)

  # tabular stream -categorical
  categorical_inputs =[]
  categorical_embeddings =[]

  for cardinality in categorical_cardinalities:
   input_cat = Input(shape=(1,))
   embedding_size = min(50, cardinality // 2)
   embedding = Embedding(cardinality, embedding_size)(input_cat)
   embedding = Flatten()(embedding)
   categorical_embeddings.append(embedding)

  # tabular stream-numerical
  numerical_input = Input(shape=(num_numerical_vars,))
  x_num = BatchNormalization()(numerical_input)

  # combine categorical and numerical
  x_tab =concatenate(categorical_embeddings + [x_num])
  x_tab = Dense (256, activation='relu')(x_tab)
  x_tab = Dense(128, activation='relu')(x_tab)

  # merge streams
  merged = concatenate([x_img, x_tab])
  x = Dense(256, activation='relu')(merged)
  x=Dropout(0.3)(x)
  x=Dense(128, activation='relu')(x)
  x= Dropout(0.3)(x)

  # output
  output = Dense(1, activation='sigmoid')(x)

  # create model
  model =Model(inputs=[image_input] + categorical_inputs + [numerical_input], outputs=output)

  return model