<a href="https://colab.research.google.com/github/43pratik/DL_Lab/blob/main/DL_P5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

data_path = '/content/process_data.csv'

try:
    df = pd.read_csv(data_path)
    print("Data loaded successfully!")
    display(df.head())
except FileNotFoundError:
    print(f"Error: The file was not found at {data_path}")
except Exception as e:
    print(f"An error occurred while loading the data: {e}")

Data loaded successfully!


Unnamed: 0,Gene,Sample,FPKM,description,tissue,metastasis
0,TSPAN6,CA.102548,0.93,CA.102548,breast tumor,yes
1,TNMD,CA.102548,0.0,CA.102548,breast tumor,yes
2,DPM1,CA.102548,0.0,CA.102548,breast tumor,yes
3,SCYL3,CA.102548,5.78,CA.102548,breast tumor,yes
4,C1orf112,CA.102548,2.83,CA.102548,breast tumor,yes


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder


try:
    df_pivot = df.pivot(index='Sample', columns='Gene', values='FPKM')
    sample_tissue = df[['Sample', 'tissue']].drop_duplicates().set_index('Sample')['tissue']
    df_pivot['tissue'] = sample_tissue

    df_pivot.dropna(inplace=True)

    X_seq = df_pivot.drop('tissue', axis=1).values
    y_seq = df_pivot['tissue'].values

    le = LabelEncoder()
    y_seq_encoded = le.fit_transform(y_seq)

    X_seq = X_seq.reshape(X_seq.shape[0], X_seq.shape[1], 1)

    X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq_encoded, test_size=0.2, random_state=42)

    print("Data prepared for RNN.")

except Exception as e:
    print(f"Could not reshape data for RNN. A standard classification model might be more suitable for this data structure.")
    print(f"Error: {e}")

Data prepared for RNN.


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, Dropout

model = Sequential()
model.add(SimpleRNN(units=50, activation='relu', input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(Dense(units=1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.summary()

  super().__init__(**kwargs)


In [None]:
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step - accuracy: 0.5938 - loss: 2.2978 - val_accuracy: 0.6250 - val_loss: 1.2368
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - accuracy: 0.5938 - loss: 1.5421 - val_accuracy: 0.6250 - val_loss: 0.8616
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - accuracy: 0.7188 - loss: 1.0543 - val_accuracy: 0.6250 - val_loss: 0.6490
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - accuracy: 0.4375 - loss: 1.2244 - val_accuracy: 0.6250 - val_loss: 0.6561
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - accuracy: 0.4375 - loss: 1.2522 - val_accuracy: 0.5000 - val_loss: 0.7216
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - accuracy: 0.6562 - loss: 0.7950 - val_accuracy: 0.5000 - val_loss: 0.7230
Epoch 7/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [None]:
loss, accuracy = model.evaluate(X_test, y_test)

print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 720ms/step - accuracy: 0.8182 - loss: 0.4025
Test Loss: 0.4025
Test Accuracy: 0.8182


In [None]:
predicted_tissue_labels = le.inverse_transform(predicted_classes.flatten())

print("Sample Tissue Predictions (first 10):")
print(predicted_tissue_labels[:10])

Sample Tissue Predictions (first 10):
['normal breast tissue' 'normal breast tissue' 'breast tumor'
 'breast tumor' 'breast tumor' 'breast tumor' 'breast tumor'
 'normal breast tissue' 'breast tumor' 'normal breast tissue']


In [None]:
predictions = model.predict(X_test)

predicted_classes = (predictions > 0.5).astype(int)

print("Sample Predictions (first 10):")
print(predicted_classes[:10])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 539ms/step
Sample Predictions (first 10):
[[1]
 [1]
 [0]
 [0]
 [0]
 [0]
 [0]
 [1]
 [0]
 [1]]
