In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# import file from drive
import pandas as pd

file_path = '/content/drive/MyDrive/ipo_2010_2021.csv'
df = pd.read_csv(file_path)
print(df.head(5))



       Date         IPO Name  \
0  29-07-21    Tatva Chintan   
1  23-07-21           Zomato   
2  19-07-21    Clean Science   
3  19-07-21        G R Infra   
4  07-07-21  India Pesticide   

                                             Profile  Issue Size (in crores)  \
0  https://www.moneycontrol.com/ipo/tatva-chintan...                  500.00   
1   https://www.moneycontrol.com/ipo/zomato_Z01.html                 9375.00   
2  https://www.moneycontrol.com/ipo/clean-science...                 1546.00   
3  https://www.moneycontrol.com/ipo/g-r-infra_I13...                  963.28   
4  https://www.moneycontrol.com/ipo/india-pestici...                  800.00   

      QIB     HNI    RII  Total   Issue  Listing Open  Listing Close  \
0    2.55    9.78  13.36   9.50  1083.0        2111.8        2310.25   
1   51.79   32.96   7.45  38.25    76.0         115.0         125.85   
2  156.37  206.43   9.00  93.41   900.0        1784.4        1585.20   
3   93.18   73.01   4.89  43.48   837.

In [None]:
# Drop the columns by their names
df = df.drop(columns=['IPO Name'])
df = df.drop(columns=['Profile'])
df = df.drop(columns=['Date'])



In [None]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
#df['IPO Name Encoded'] = label_encoder.fit_transform(df['IPO Name'])


import numpy as np
from sklearn.preprocessing import MinMaxScaler

# Sample data
values = np.array([[10], [20], [30], [40], [50]])

# Initialize the scaler
scaler = MinMaxScaler()

# Normalize the data
normalized_values = scaler.fit_transform(values)

# Print the result
print(normalized_values)


[[0.  ]
 [0.25]
 [0.5 ]
 [0.75]
 [1.  ]]


In [None]:
# Ensure the "Current  Gains (%)" column is properly formatted as numeric
df['Current  Gains (%)'] = pd.to_numeric(df['Current  Gains (%)'].astype(str).str.replace(',', ''), errors='coerce')

# Create a new column "label" where 1 represents positive gains, and 0 represents non-positive gains
df['label'] = df['Current  Gains (%)'].apply(lambda x: 1 if x > 0 else 0)

# Display the updated DataFrame
print(df.head(10))


   Issue Size (in crores)     QIB     HNI    RII  Total   Issue  Listing Open  \
0                  500.00    2.55    9.78  13.36   9.50  1083.0        2111.8   
1                 9375.00   51.79   32.96   7.45  38.25    76.0         115.0   
2                 1546.00  156.37  206.43   9.00  93.41   900.0        1784.4   
3                  963.28   93.18   73.01   4.89  43.48   837.0        1700.0   
4                  800.00    2.64    1.36   4.40   3.25   296.0         360.0   
5                 2146.00    5.26    1.89   2.90   3.86   825.0        1009.0   
6                  521.00   84.88   73.26  11.34  45.62   428.0         550.0   
7                  909.00   62.18  172.51   5.29  56.56   306.0         380.0   
8                 5550.00    1.57    0.22   0.66   1.04   291.0         302.4   
9                 7734.99    0.00    0.00   0.00   0.00   100.0         104.0   

   Listing Close  Listing Gains(%)       CMP  Current  Gains (%)  label  
0        2310.25                 1

In [None]:
# Check for missing values
print(df.isnull().sum())


print(df.columns)

# Display cleaned dataset
df.head()

Issue Size (in crores)    0
QIB                       0
HNI                       0
RII                       0
Total                     0
Issue                     0
Listing Open              0
Listing Close             0
Listing Gains(%)          0
CMP                       0
Current  Gains (%)        2
label                     0
dtype: int64
Index(['Issue Size (in crores)', 'QIB', 'HNI', 'RII', 'Total', 'Issue',
       'Listing Open', 'Listing Close', 'Listing Gains(%)', 'CMP',
       'Current  Gains (%)', 'label'],
      dtype='object')


Unnamed: 0,Issue Size (in crores),QIB,HNI,RII,Total,Issue,Listing Open,Listing Close,Listing Gains(%),CMP,Current Gains (%),label
0,500.0,2.55,9.78,13.36,9.5,1083.0,2111.8,2310.25,1,2268.5,109.46,1
1,9375.0,51.79,32.96,7.45,38.25,76.0,115.0,125.85,1,133.35,75.46,1
2,1546.0,156.37,206.43,9.0,93.41,900.0,1784.4,1585.2,1,1682.8,86.98,1
3,963.28,93.18,73.01,4.89,43.48,837.0,1700.0,1746.8,1,1730.85,106.79,1
4,800.0,2.64,1.36,4.4,3.25,296.0,360.0,335.45,1,337.4,13.99,1


In [None]:
# Convert columns with potential comma formatting to numeric
for col in ['Issue Size (in crores)', 'QIB', 'HNI', 'RII', 'Total', 'Issue', 'Listing Open', 'Listing Close']:  # Add other relevant columns
    try:
        df[col] = df[col].str.replace(',','').astype(float)
    except AttributeError:
        # Column might already be numeric, skip
        pass


# Assuming 'Issue Price' and 'Listing Price' columns exist in your dataset
# Create the target variable based on gain or loss on the first day

# Calculate gain or loss (1 for gain, 0 for loss)
df['Listing Gains(%)'] = (df['Listing Open'] > df['Issue']).astype(int)

# Display the new Outcome column to confirm
print(df[['Issue', 'Listing Open', 'Listing Gains(%)']].head())


    Issue  Listing Open  Listing Gains(%)
0  1083.0        2111.8                 1
1    76.0         115.0                 1
2   900.0        1784.4                 1
3   837.0        1700.0                 1
4   296.0         360.0                 1


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

In [None]:
from sklearn.preprocessing import MinMaxScaler

# Numeric columns
numeric_columns = df[['Issue Size (in crores)', 'QIB', 'HNI', 'RII', 'Issue','Listing Open', 'Listing Close']]

# Columns to scale
columns_to_scale = ['Issue Size (in crores)', 'QIB', 'HNI', 'RII', 'Issue','Listing Open', 'Listing Close']

# Initialize the scaler
scaler = MinMaxScaler()

# Apply Min-Max scaling to the specified columns
numeric_columns[columns_to_scale] = scaler.fit_transform(numeric_columns[columns_to_scale])

# Concatenate the dummy variables and scaled numeric columns into a new DataFrame
X = pd.concat([numeric_columns], axis=1)

# Display the shape and first few rows of the resulting DataFrame
print(X.shape)
print(X.head())

(264, 7)
   Issue Size (in crores)       QIB       HNI       RII     Issue  \
0                0.032310  0.014419  0.010208  0.171436  0.613250   
1                0.605816  0.292847  0.034402  0.095599  0.043035   
2                0.099903  0.884196  0.215464  0.115488  0.509626   
3                0.062247  0.526887  0.076205  0.062749  0.473952   
4                0.051696  0.014928  0.001420  0.056461  0.167610   

   Listing Open  Listing Close  
0      0.774185       0.740655  
1      0.038851       0.039871  
2      0.653618       0.508049  
3      0.622537       0.559893  
4      0.129074       0.107113  


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  numeric_columns[columns_to_scale] = scaler.fit_transform(numeric_columns[columns_to_scale])


In [None]:
# Data preprocessing: convert 'Listing Gains(%)' to a binary classification target
# df['Listing_Gains_Positive'] = (df['Listing Gains(%)'] > 0).astype(int)

# Select features (you can modify this based on feature importance)
y = df['label']
y = pd.get_dummies(y, prefix='label')

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# # Scale the features
# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)

# Build the model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(2, activation='softmax')  # Binary classification output
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=300, batch_size=32, validation_split=0.2, verbose=2)

# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.2f}")

Epoch 1/300


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


6/6 - 1s - 197ms/step - accuracy: 0.3690 - loss: 0.6944 - val_accuracy: 0.4419 - val_loss: 0.6929
Epoch 2/300
6/6 - 0s - 29ms/step - accuracy: 0.4762 - loss: 0.6905 - val_accuracy: 0.4186 - val_loss: 0.6903
Epoch 3/300
6/6 - 0s - 10ms/step - accuracy: 0.4881 - loss: 0.6873 - val_accuracy: 0.4884 - val_loss: 0.6881
Epoch 4/300
6/6 - 0s - 8ms/step - accuracy: 0.5000 - loss: 0.6840 - val_accuracy: 0.4651 - val_loss: 0.6863
Epoch 5/300
6/6 - 0s - 9ms/step - accuracy: 0.5060 - loss: 0.6817 - val_accuracy: 0.4884 - val_loss: 0.6847
Epoch 6/300
6/6 - 0s - 10ms/step - accuracy: 0.5060 - loss: 0.6792 - val_accuracy: 0.4884 - val_loss: 0.6830
Epoch 7/300
6/6 - 0s - 9ms/step - accuracy: 0.5476 - loss: 0.6766 - val_accuracy: 0.6279 - val_loss: 0.6810
Epoch 8/300
6/6 - 0s - 9ms/step - accuracy: 0.5952 - loss: 0.6744 - val_accuracy: 0.6512 - val_loss: 0.6787
Epoch 9/300
6/6 - 0s - 14ms/step - accuracy: 0.6190 - loss: 0.6717 - val_accuracy: 0.6977 - val_loss: 0.6761
Epoch 10/300
6/6 - 0s - 9ms/step -

In [None]:
# Convert the model to TensorFlow Lite format
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the TensorFlow Lite model to a file
with open("model.tflite", "wb") as f:
    f.write(tflite_model)

print("Model successfully converted to TensorFlow Lite format and saved as model.tflite")


Saved artifact at '/tmp/tmp3mbyqwlh'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 7), dtype=tf.float32, name='keras_tensor_44')
Output Type:
  TensorSpec(shape=(None, 2), dtype=tf.float32, name=None)
Captures:
  134563704615536: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134563705131200: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134563705129792: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134563705129264: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134563705138768: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134563705135248: TensorSpec(shape=(), dtype=tf.resource, name=None)
Model successfully converted to TensorFlow Lite format and saved as model.tflite


In [None]:
# Save the model in HDF5 format
model.save("ipo.h5")

# Reload the model from the HDF5 file
reloaded_model = tf.keras.models.load_model("ipo.h5")



In [None]:
def representative_dataset():
    for _ in range(100):
      data =  X_test
      yield [data.astype(np.float32)]

print(representative_dataset())

<generator object representative_dataset at 0x7a628da72c70>


In [None]:
# After saving in HDF5 format
converter = tf.lite.TFLiteConverter.from_keras_model(reloaded_model)  # Use reloaded model

# Set quantization and representative dataset if needed
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset  # Assuming you have a function defined

# Convert to TFLite
tflite_model = converter.convert()

# Save the TFLite model
with open("ipo.tflite", "wb") as f:
    f.write(tflite_model)

Saved artifact at '/tmp/tmpk1vwkhzi'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 7), dtype=tf.float32, name='input_layer_8')
Output Type:
  TensorSpec(shape=(None, 2), dtype=tf.float32, name=None)
Captures:
  134563702285136: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134563700785744: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134563700792960: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134563700797888: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134563700798768: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134563700867488: TensorSpec(shape=(), dtype=tf.resource, name=None)




In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.int8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8
converter.representative_dataset = representative_dataset
tflite_model = converter.convert()

Saved artifact at '/tmp/tmptmha85l1'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 7), dtype=tf.float32, name='keras_tensor_44')
Output Type:
  TensorSpec(shape=(None, 2), dtype=tf.float32, name=None)
Captures:
  134563704615536: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134563705131200: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134563705129792: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134563705129264: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134563705138768: TensorSpec(shape=(), dtype=tf.resource, name=None)
  134563705135248: TensorSpec(shape=(), dtype=tf.resource, name=None)




In [None]:
interpreter = tf.lite.Interpreter(model_path="ipo.tflite")
interpreter.allocate_tensors()

In [None]:
# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
print('input_details:\n', input_details)
print('output_details:\n', output_details)

input_details:
 [{'name': 'serving_default_input_layer_8:0', 'index': 0, 'shape': array([1, 7], dtype=int32), 'shape_signature': array([-1,  7], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]
output_details:
 [{'name': 'StatefulPartitionedCall_1:0', 'index': 12, 'shape': array([1, 2], dtype=int32), 'shape_signature': array([-1,  2], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load the TFLite model
interpreter = tf.lite.Interpreter(model_path="ipo.tflite")
interpreter.allocate_tensors()

# Get input and output tensor details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Define the expected input shape
input_shape = input_details[0]['shape'][1]  # Should be 100

# Test the model on random input data (padded to 100)
input0_data = np.random.randint(0, 5000, size=(1, input_shape))  # Random integer values up to vocabulary size
print("Random input data:", input0_data)
input0_data = np.array(input0_data, dtype=np.float32)
interpreter.set_tensor(input_details[0]['index'], input0_data)

# Invoke the interpreter
interpreter.invoke()

# Get output prediction from the TFLite model
output0_data = interpreter.get_tensor(output_details[0]['index'])
print("TFLite model output for random input data:", output0_data)

# Verify if the same data is given to the original model, what is the output
output0_data_keras = model.predict(input0_data)
print("Keras model output for random input data:", output0_data_keras)


# Define specific inputs based on pre-processed tokenized data
# Ensure that input data is exactly 100 tokens by padding/truncating
input1_data = [[440.0, 32.0, 44.0, 841.0, 697.0, 352.0, 149.0, 88.0, 9.0, 3538.0,
                899.0, 440.0, 167.0, 52.0, 124.0, 3467.0, 93.0, 38.0, 349.0, 44.0,
                947.0, 253.0, 440.0, 312.0, 327.0, 1947.0, 3468.0]]
input1_data = pad_sequences(input1_data, maxlen=input_shape, padding='post')  # Pad to length 100
input1_data = np.array(input1_data, dtype=np.float32)
interpreter.set_tensor(input_details[0]['index'], input1_data)

# Invoke the interpreter
interpreter.invoke()

# Get output prediction from the TFLite model for input1_data
output1_data = interpreter.get_tensor(output_details[0]['index'])
print("TFLite model output for input1_data:", output1_data)

# Verify with the original Keras model
output1_data_keras = model.predict(input1_data)
print("Keras model output for input1_data:", output1_data_keras)


# Define another specific input (input2_data) based on the actual tokenized message
input2_data = [[995.0, 127.0, 276.0, 62.0, 83.0]]
input2_data = pad_sequences(input2_data, maxlen=input_shape, padding='post')  # Pad to length 100
input2_data = np.array(input2_data, dtype=np.float32)
interpreter.set_tensor(input_details[0]['index'], input2_data)

# Invoke the interpreter
interpreter.invoke()

# Get output prediction from the TFLite model for input2_data
output2_data = interpreter.get_tensor(output_details[0]['index'])
print("TFLite model output for input2_data:", output2_data)

# Verify with the original Keras model
output2_data_keras = model.predict(input2_data)
print("Keras model output for input2_data:", output2_data_keras)

Random input data: [[3213 1274 4633 3402  776 3346 1553]]
TFLite model output for random input data: [[0.         0.99609375]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
Keras model output for random input data: [[0. 1.]]
TFLite model output for input1_data: [[0.         0.99609375]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
Keras model output for input1_data: [[0. 1.]]
TFLite model output for input2_data: [[0.99609375 0.        ]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
Keras model output for input2_data: [[1. 0.]]


In [None]:
import time
import sys
import numpy as np
import tensorflow as tf

def hex_to_c_array(hex_data, var_name, num_epochs=None, batch_size=None, dense_sizes=None):
    """
    Function to convert hex data to a C array with optional metadata.

    Parameters:
    - hex_data (bytes): The model or data to be converted into C array format.
    - var_name (str): The variable name to use in the generated C file.
    - num_epochs (int, optional): Number of epochs used in training, if available.
    - batch_size (int, optional): Batch size used in training, if available.
    - dense_sizes (list of int, optional): List of layer sizes (e.g., [64, 32] for two dense layers), if available.

    Returns:
    - str: C-style header file content with the hex data as an array.
    """
    c_str = ""

    # Create header guard
    c_str += '#ifndef ' + var_name.upper() + '_H\n'
    c_str += '#define ' + var_name.upper() + '_H\n\n'

    # Header comment and timestamp
    c_str += "/*\n CAUTION: This is an auto-generated file.\n DO NOT EDIT OR MAKE ANY CHANGES TO IT.\n"
    localtime = time.asctime(time.localtime(time.time()))
    c_str += " Generated on: " + localtime + "\n\n"

    # Add Python and TensorFlow version information
    c_str += " Tools used:\n"
    c_str += f" Python: {sys.version}\n"
    c_str += f" Numpy: {np.__version__}\n"
    c_str += f" TensorFlow: {tf.__version__}\n\n"

    # Optional model training details if provided
    if num_epochs is not None:
        c_str += f" Training epochs: {num_epochs}\n"
    if batch_size is not None:
        c_str += f" Batch size: {batch_size}\n"
    if dense_sizes is not None:
        for i, size in enumerate(dense_sizes, start=1):
            c_str += f" DENSE{i}_SIZE = {size};\n"

    c_str += "*/\n\n"

    # Add array length at the top of the file
    c_str += f"const unsigned int {var_name}_len = {len(hex_data)};\n\n"

    # Declare C variable with 8-byte alignment
    c_str += f"alignas(8) const unsigned char {var_name}[] = {{\n"
    hex_array = []

    # Process each byte in hex_data
    for i, val in enumerate(hex_data):
        hex_str = format(val, '#04x')  # Convert to hex format
        if (i + 1) < len(hex_data):
            hex_str += ','  # Add comma except for the last element
        if (i + 1) % 12 == 0:  # New line after every 12 bytes for readability
            hex_str += '\n'
        hex_array.append(hex_str)

    # Add formatted hex data to the C string
    c_str += ''.join(hex_array)
    c_str += "\n};\n\n"

    # Close header guard
    c_str += f'#endif // {var_name.upper()}_H\n'

    return c_str

In [None]:
# Write TFLite model to a C source (or header) file
with open("ipo" + '.h', 'w') as file:
  file.write(hex_to_c_array(tflite_model, "ipo"))

In [None]:
model.summary()