In [None]:
import torch
import torch.nn as nn
import pandas as pd
import hashlib
import numpy as np
from torch.utils.data import Dataset,DataLoader
from torch.optim import Adam
from sklearn.model_selection import train_test_split

## How we can make encoded hash from data?

In [None]:
hash_out = hashlib.md5("Hello, World!".encode())
hash_out.hexdigest()

Normally, we cannot crack the hash codes(Get the real data from its hash). But, there are some sources you can find like : [crackstation](https://crackstation.net/)

# Create two dict from valid outputs

In [None]:
dict_l_to_index = {l:i for i,l in enumerate("0123456789abcdef")}
dict_i_to_l = {dict_l_to_index[i]:i for i in dict_l_to_index.keys()}

# Create random strings from ascii codes. Next, we are going to feed these data to out DNN model.

In [None]:
input1 = np.random.randint(97,122,(50000,10))

# Make the hash encoded outputs from input data.

In [None]:
output = []
for i in range(input1.shape[0]):
  str1 = "".join(map(chr,input1[i]))
  #print(str1)
  md5 = hashlib.md5(str1.encode())
  str_hex = md5.hexdigest()
  #print(str_hex)
  #print(len(list(map(ord,str_hex))))
  output.append(str_hex)


# Create dataframe out of inputs

In [None]:
df1 = pd.DataFrame(input1,columns=[f"_{i}" for i in range(10)])
df1.head()

# Create target on datatframe

In [None]:
df1["target"] = output

# We can save the dataset...

In [None]:
df1.to_parquet("input_to_md5.prq")

# Create a dataset for regression model

In [None]:
class cls_data_regression(Dataset):
  def __init__(self, df) -> None:
      super().__init__()
      self.dataset = df

      print("Head of dataset: ", self.dataset.head(2))

  def __len__(self):
      return len(self.dataset)

  def __getitem__(self, index) :
      lst_output = list(map(ord,self.dataset.loc[index,"target"]))
      return torch.tensor(self.dataset.iloc[index, 0:10])/150.0, torch.tensor(lst_output)/1.0


# Create dataset for classification 

In [None]:
class cls_data_classification(Dataset):
  def __init__(self, df) -> None:
      super().__init__()
      self.dataset = df

      print("Head of dataset: ", self.dataset.head(2))

  def __len__(self):
      return len(self.dataset)

  def __getitem__(self, index) :
      
      t_output = torch.zeros((32,16))
      for i,l in enumerate(self.dataset.loc[index,"target"]):
        t_output[i,dict_l_to_index[l]] = 1.0
      t_output = t_output.reshape((-1,))
      return torch.tensor(self.dataset.iloc[index, 0:10])/150.0, t_output


In [None]:
class cls_model_lreg(nn.Module):
  def __init__(self) -> None:
      super().__init__()
      self.model = nn.Sequential(
          nn.Linear(10,512),
          nn.ReLU(),
          nn.Linear(512,1024),
          nn.ReLU(),
          nn.Linear(1024,64),
          nn.ReLU(),
          nn.Dropout(0.2),
          nn.Linear(64,32)
      )     

  def forward(self,x_train):

    logits = self.model(x_train)
    return logits

In [None]:
class cls_model_classification(nn.Module):
  def __init__(self) -> None:
      super().__init__()
      self.model = nn.Sequential(
          nn.Linear(10,2048),
          nn.ReLU(),
          nn.Linear(2048,1024),
          nn.ReLU(),
          nn.Dropout(0.2),
          nn.Linear(1024,512),
                  
      )

      self.sigm = nn.Sigmoid()

  def forward(self,x_train):

    logits = self.sigm(self.model(x_train))
    return logits

In [None]:
df_md5 = pd.read_parquet("input_to_md5.prq")

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
df_train , df_test = train_test_split(df1,test_size=.1)

In [None]:
ds_train = cls_data_classification(df_train.reset_index().drop(columns=["index"]))

ds_test = cls_data_classification(df_test.reset_index().drop(columns=["index"]))

trainLoader = DataLoader(ds_train,batch_size=16)
testLoader = DataLoader(ds_test,batch_size=16)


In [None]:
def f_train(net, trainLoader, valLoader, optim1, criterion1):
  
  for ep in range(2):
    for it , (x,y) in enumerate(trainLoader):
      optim1.zero_grad()
      x,y = x.cuda(),y.cuda()
      logits = net(x)

      loss = criterion(logits,y)

      loss.backward()

      optim1.step()

      if it%100 == 0:
        print("Loss:", loss)




In [None]:

def f_evaluate(net, criterion, dataloader):
    net.eval()

    mean_acc, mean_loss = 0, 0
    count = 0

    with torch.no_grad():
        for x,y in dataloader:           
            x,y = x.cuda(),y.cuda()
            logits = net(x)
            
            #print("X:",x)
            print("y^:", logits)
            
            
            count += 1
            
        return logits

    

In [None]:
net = cls_model_classification()
net.to(device)

criterion = nn.BCELoss()#nn.MSELoss()
opti = Adam(net.parameters(),lr=6e-3)

f_train(net,trainLoader,testLoader,opti,criterion)

In [None]:
predict = f_evaluate(net,criterion,testLoader)

In [None]:

print("".join(chr(i) for i in (ds_test[0][0]*150).int().tolist()))