In [32]:
import numpy as np
import torch 
import matplotlib.pyplot as plt
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler

df = pd.read_csv('daily_csv.csv')
df.dropna(inplace=True)
y = df['Price'].values.reshape(-1,1)
sc = MinMaxScaler()
y = sc.fit_transform(y).flatten()
x = np.arange(1,len(y),1)
seq_len = 10
X,Y = [],[]
for i in range(0,5000):
    ls = []
    for j in range(i, i + seq_len):
        ls.append(y[j])
    X.append(ls)
    Y.append(y[j + 1])
X,Y = np.array(X),np.array(Y)
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.1, random_state = 42, shuffle = False, stratify = None)

class custom_dataset(Dataset):
    def __init__(self, x, y):
        self.x = torch.tensor(x, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)
        self.len = x.shape[0]

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

    def __len__(self):
        return self.len

data = custom_dataset(x_train, y_train)
train_loader = DataLoader(data, shuffle = True, batch_size = 256)
device = "cuda" if torch.cuda.is_available() else "cpu"
class RNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.rnn = nn.RNN(input_size = 1, hidden_size = 5, num_layers = 1, batch_first = True)
        self.fc1 = nn.Linear(in_features = 5, out_features = 1)

    def forward(self,x ):
        output, _status = self.rnn(x)
        output = output[:,-1,:]
        output = self.fc1(torch.relu(output))
        return output

model = RNN().to(device)
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
losses=[]
for epoch in range(10):
    for i,(X,y) in enumerate(train_loader):
        X,y = X.to(device),y.to(device)
        X = X.view(-1,seq_len,1)
        y_pred = model(X)
        loss = loss_fn(y_pred,y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"Epoch : {epoch} | Train loss: {loss.item()/len(train_loader)}")


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch : 0 | Train loss: 0.01829891072379218
Epoch : 1 | Train loss: 0.015761627091301814
Epoch : 2 | Train loss: 0.010942712426185608
Epoch : 3 | Train loss: 0.009428432418240441
Epoch : 4 | Train loss: 0.007589219344986809
Epoch : 5 | Train loss: 0.006158503807253308
Epoch : 6 | Train loss: 0.005209334194660187
Epoch : 7 | Train loss: 0.004767655498451657
Epoch : 8 | Train loss: 0.004312836461597019
Epoch : 9 | Train loss: 0.0034642256796360016


In [13]:
import os 
import glob 
import torch 
from torch import nn 
from torch.utils.data import Dataset,DataLoader
from pathlib import Path 

all_names,labels = [],[]
max_len_name = 0
base_dir = Path('data/dataset/names')
# print(base_dir)
file_paths = list(base_dir.glob('*'))
# print(file_paths)
categories = [file_path.stem for file_path in file_paths]
# print(categories)
num_classes = len(categories)
# print(num_classes)

for file_path in file_paths:
    with open(file_path,'r',encoding='utf-8') as f:
        names = f.read().split('\n')
        for name in names:
            if len(name) > max_len_name:
                max_len_name = len(name)
# print(max_len_name)

for i,file_path in enumerate(file_paths):
    nationality_index = categories.index(file_path.stem)
    with open(file_path,'r',encoding='utf-8') as f:
        names = f.read().split('\n')
        for name in names:
            num_list = [ord(k) for k in name] + [0]*(max_len_name - len(name))
            all_names.append(num_list)
            labels.append(nationality_index)
            
class Data(Dataset):
    def __init__(self,X,y):
        super().__init__()
        self.X = torch.tensor(X,dtype=torch.float32)
        self.y = torch.tensor(y,dtype=torch.long)

    def __getitem__(self, index):
        return self.X[index],self.y[index]
    
    def __len__(self):
        return len(self.X)
    
class RNN_Model(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes) -> None:
        super().__init__()
        self.rnn = nn.RNN(input_size=input_size,hidden_size=hidden_size,num_layers=1,batch_first=True)
        self.fc1 = nn.Linear(in_features=hidden_size,out_features=num_classes)
    
    def forward(self,X):
        output, _ = self.rnn(X)
        output = output[:,-1,:]
        output = self.fc1(output)
        return output

data = Data(all_names,labels)
data_loader = DataLoader(data, batch_size=32, shuffle=True)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = RNN_Model(input_size=1,hidden_size=12,num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.005)

epochs = 5
train_loss = 0
for epoch in range(epochs):
    for batch,(X,y) in enumerate(data_loader):
        X,y = X.to(device),y.to(device)
        X = X.view(-1,max_len_name,1)
        y_pred = model(X)
        optimizer.zero_grad()
        loss = criterion(y_pred,y)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    train_loss /= len(data_loader)
    print(f"Epochs {epoch + 1} | Loss: {train_loss}")

def predict(model,name,max_len_name,categories,device):
    name_ascii = [ord(char) for char in name] + [0] * (max_len_name - len(name))
    name_tensor = torch.tensor([name_ascii],dtype=torch.float32).to(device)
    name_tensor = name_tensor.view(-1,max_len_name,1)
    model.eval()
    with torch.inference_mode():
        output = model(name_tensor)
        _,predicted_index = torch.max(output,1)
        nationality = categories[predicted_index]
    return nationality

sample = 'alex'
sample_out = predict(model,sample,max_len_name,categories,device)
print(f"predicted nationality of {sample} is {sample_out}") 

Epochs 1 | Loss: 1.8706881320400603
Epochs 2 | Loss: 1.8539345865093855
Epochs 3 | Loss: 1.8542086849364254
Epochs 4 | Loss: 1.8545676530182225
Epochs 5 | Loss: 1.8543791539446277
predicted nationality of alex is Russian


In [37]:
import os 
import glob 
import torch 
from torch import nn 
from torch.utils.data import Dataset,DataLoader
from pathlib import Path 
base_dir = Path('data/dataset/names')
print(base_dir)
file_paths = list(base_dir.glob('*'))
print(file_paths)
all_chars = set()
for file_path in file_paths:
    with open(file_path,'r',encoding='utf-8') as f:
        names = f.read().split('\n')
        for name in names:
            all_chars.update(name)

all_chars = sorted(list(all_chars))
num_chars = len(all_chars)
char_to_idx = {ch:i for i,ch in enumerate(all_chars)}
index_to_char = {i:ch for i,ch in enumerate(all_chars)}

max_sequence_length = 10
sequences,next_chars = [],[]

for file_path in file_paths:
    with open(file_path,'r',encoding='utf-8') as f:
        names = f.read().split('\n')
        for name in names:
            for i in range(len(name) - 1):
                start_index = max(0,i + 1 - max_sequence_length)
                end_index = i + 1
                sequence = [char_to_idx[ch] for ch in name[start_index:end_index]]
                sequence += [0]*(max_sequence_length - len(sequence))
                sequences.append(sequence)
                next_chars.append(char_to_idx[name[i + 1]])

class Data(Dataset):
    def __init__(self,sequences,next_chars):
        super().__init__()
        self.sequences = torch.tensor(sequences,dtype=torch.long)
        self.next_chars = torch.tensor(next_chars,dtype=torch.long)
    
    def __getitem__(self, index):
        return self.sequences[index],self.next_chars[index]
    
    def __len__(self):
        return len(self.sequences)

class CharModel(nn.Module):
    def __init__(self, num_chars, hidden_size):
        super().__init__()
        self.embedding = nn.Embedding(num_embeddings=num_chars,embedding_dim=hidden_size)
        self.rnn = nn.RNN(input_size=hidden_size,hidden_size=hidden_size,batch_first=True)
        self.fc = nn.Linear(in_features=hidden_size,out_features=num_chars)
    
    def forward(self,x):
        x = self.embedding(x)
        output,_ = self.rnn(x)
        output = output[:,-1,:]
        output = self.fc(output)
        return output

dataset = Data(sequences,next_chars)
data_loader = DataLoader(dataset,batch_size=32,shuffle=True)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = CharModel(num_chars=num_chars,hidden_size=100).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.005)
epochs = 5
train_loss = 0 
for epoch in range(epochs):
    for batch,(seq,next_char) in enumerate(data_loader):
        seq,next_char = seq.to(device),next_char.to(device)
        output = model(seq)
        optimizer.zero_grad()
        loss = criterion(output,next_char)
        loss.backward()
        optimizer.step()
    train_loss += loss.item()
    train_loss /= len(data_loader)
    print(f"Epoch {epoch + 1} | Loss: {train_loss}")

def predict(model, char_to_idx, idx_to_char, sequence, hidden_size=100, device='cpu'):
    sequence = sequence[-max_sequence_length:]
    sequence_idx = [char_to_idx.get(ch, 0) for ch in sequence]
    sequence_idx += [0] * (max_sequence_length - len(sequence_idx))
    input_tensor = torch.tensor([sequence_idx], dtype=torch.long).to(device)  # Corrected line
    model.eval()
    with torch.no_grad():  # Updated to use torch.no_grad() for consistency
        input_tensor = input_tensor.to(device)  # Ensure tensor is on the correct device
        pred = model(input_tensor)
        pred_idx = pred.argmax(dim=1).item()
    next_char = idx_to_char[pred_idx]
    return next_char

# Example usage, ensuring the device is correctly used
pred_char = predict(model, char_to_idx, index_to_char, 'hello', device=device)
print(f"Predicted character: {pred_char}")


data\dataset\names
[WindowsPath('data/dataset/names/Arabic.txt'), WindowsPath('data/dataset/names/Chinese.txt'), WindowsPath('data/dataset/names/Czech.txt'), WindowsPath('data/dataset/names/Dutch.txt'), WindowsPath('data/dataset/names/English.txt'), WindowsPath('data/dataset/names/French.txt'), WindowsPath('data/dataset/names/German.txt'), WindowsPath('data/dataset/names/Greek.txt'), WindowsPath('data/dataset/names/Irish.txt'), WindowsPath('data/dataset/names/Italian.txt'), WindowsPath('data/dataset/names/Japanese.txt'), WindowsPath('data/dataset/names/Korean.txt'), WindowsPath('data/dataset/names/Polish.txt'), WindowsPath('data/dataset/names/Portuguese.txt'), WindowsPath('data/dataset/names/Russian.txt'), WindowsPath('data/dataset/names/Scottish.txt'), WindowsPath('data/dataset/names/Spanish.txt'), WindowsPath('data/dataset/names/Vietnamese.txt')]
Epoch 1 | Loss: 0.0007172762745558613
Epoch 2 | Loss: 0.000622891075266391
Epoch 3 | Loss: 0.0006781061332621962
Epoch 4 | Loss: 0.00060884

In [18]:
import glob 
import os 
import torch 
from torch import nn 
from torch.utils.data import Dataset,DataLoader
from pathlib import Path

base_dir = Path('data/dataset/names')
print(base_dir)
file_paths = list(base_dir.glob('*'))
print(file_paths)
all_names,labels = [],[]
max_len = 0
categories = [file_path.stem for file_path in file_paths]
print(categories)
num_classes = len(categories)
print(num_classes)
for file_path in file_paths:
    with open(file_path,'r',encoding='utf-8') as f:
        names = f.read().split('\n')
        for name in names:
            if len(name) > max_len:
                max_len = len(name)
print(max_len)

for i,file_path in enumerate(file_paths):
    category_index = categories.index(file_path.stem)
    with open(file_path,'r',encoding='utf-8') as f:
        names = f.read().split('\n')
        for name in names:
            num_list = [ord(char) for char in name] + [0]*(max_len - len(name))
            all_names.append(num_list)
            labels.append(category_index)
print(len(all_names),len(labels))

class Data(Dataset):
    def __init__(self,X,y):
        super().__init__()
        self.X = torch.tensor(X,dtype=torch.float32)
        self.y = torch.tensor(y,dtype=torch.long)
    
    def __getitem__(self, index):
        return self.X[index],self.y[index]
    
    def __len__(self):
        return len(self.X)

class RNN_Char(nn.Module):
    def __init__(self, input_size,hidden_size,output_size):
        super().__init__()
        self.rnn = nn.RNN(input_size=input_size,hidden_size=hidden_size,num_layers=1,batch_first=True)
        self.fc = nn.Linear(in_features=hidden_size,out_features=output_size)
    
    def forward(self,X):
        output,_ = self.rnn(X)
        output = output[:,-1,:]
        output = self.fc(output)
        return output

dataset = Data(all_names,labels)
data_loader = DataLoader(dataset=dataset,batch_size=32,shuffle=True)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = RNN_Char(input_size=1,hidden_size=10,output_size=num_classes).to(device)
model 
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.005)
epochs = 5
train_loss = 0
for epoch in range(epochs):
    for batch,(X,y) in enumerate(data_loader):
        X,y = X.to(device),y.to(device)
        X = X.view(-1,max_len,1)
        y_pred = model(X)
        loss = loss_fn(y_pred,y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    train_loss /= len(data_loader)
    print(f"Epoch: {epoch + 1} | Train loss:{train_loss}")
    
def predict(model,categories,name,max_len,device):
    name_ascii = [ord(char) for char in name] + [0] * (max_len - len(name))
    name_tensor = torch.tensor([name_ascii],dtype=torch.float32)
    name_tensor = name_tensor.view(-1,max_len,1).to(device)
    with torch.no_grad():
        y_pred = model(name_tensor)
        pred_idx = y_pred.argmax(dim=1)
    nationality_index = categories[pred_idx.item()]
    return nationality_index        

sample_input = 'Alexander'
sample_out = predict(model, categories, sample_input, max_len, device)
print(f"The predicted nationality for {sample_input} is {sample_out}.")



data\dataset\names
[WindowsPath('data/dataset/names/Arabic.txt'), WindowsPath('data/dataset/names/Chinese.txt'), WindowsPath('data/dataset/names/Czech.txt'), WindowsPath('data/dataset/names/Dutch.txt'), WindowsPath('data/dataset/names/English.txt'), WindowsPath('data/dataset/names/French.txt'), WindowsPath('data/dataset/names/German.txt'), WindowsPath('data/dataset/names/Greek.txt'), WindowsPath('data/dataset/names/Irish.txt'), WindowsPath('data/dataset/names/Italian.txt'), WindowsPath('data/dataset/names/Japanese.txt'), WindowsPath('data/dataset/names/Korean.txt'), WindowsPath('data/dataset/names/Polish.txt'), WindowsPath('data/dataset/names/Portuguese.txt'), WindowsPath('data/dataset/names/Russian.txt'), WindowsPath('data/dataset/names/Scottish.txt'), WindowsPath('data/dataset/names/Spanish.txt'), WindowsPath('data/dataset/names/Vietnamese.txt')]
['Arabic', 'Chinese', 'Czech', 'Dutch', 'English', 'French', 'German', 'Greek', 'Irish', 'Italian', 'Japanese', 'Korean', 'Polish', 'Portug

In [25]:
import glob 
import os 
import torch 
from torch import nn 
from torch.utils.data import Dataset,DataLoader
from pathlib import Path

all_chars = set()
next_chars,sequences = [],[]
max_seq_len = 10
base_dir = Path('data/dataset/names')
file_paths = list(base_dir.glob('*'))
for file_path in file_paths:
    with open(file_path,'r',encoding='utf-8') as f:
        names = f.read().split('\n')
        for name in names:
            all_chars.update(name)
all_chars = sorted(list(all_chars))
num_chars = len(all_chars)
char_to_idx = {ch:i for i,ch in enumerate(all_chars)}
index_to_char = {i:ch for i,ch in enumerate(all_chars)}

for file_path in file_paths:
    with open(file_path,'r',encoding='utf-8') as f:
        names = f.read().split('\n')
        for name in names:
            for i in range(len(name) - 1):
                start_index = max(0,i+1-max_seq_len)
                end_index = i + 1
                sequence = [char_to_idx[ch] for ch in name[start_index:end_index]]
                sequence += [0]*(max_seq_len - len(sequence))
                sequences.append(sequence)
                next_chars.append(char_to_idx[name[i + 1]])

class Data(Dataset):
    def __init__(self,sequences,next_chars):
        self.sequences = torch.tensor(sequences,dtype=torch.long)
        self.next_chars = torch.tensor(next_chars,dtype=torch.long)
    
    def __getitem__(self, index):
        return self.sequences[index],self.next_chars[index]
    
    def __len__(self):
        return len(self.sequences)
    
dataset = Data(sequences,next_chars)
data_loader = DataLoader(dataset=dataset,batch_size=32,shuffle=True)
device = "cuda" if torch.cuda.is_available() else "cpu"

class RNN_Model(nn.Module):
    def __init__(self, num_chars,hidden_size):
        super().__init__()
        self.embeddings = nn.Embedding(num_embeddings=num_chars,embedding_dim=hidden_size)
        self.rnn = nn.RNN(input_size=hidden_size,hidden_size=hidden_size,num_layers=1,batch_first=True)
        self.fc = nn.Linear(in_features=hidden_size,out_features=num_chars)
        
    def forward(self,x):
        x = self.embeddings(x)
        output,_ = self.rnn(x)
        output = output[:,-1,:]
        output = self.fc(output)
        return output
    
criterion = nn.CrossEntropyLoss()
model = RNN_Model(num_chars,hidden_size=100).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
# Training loop
epochs = 5
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for seq, next_char in data_loader:
        seq, next_char = seq.to(device), next_char.to(device)
        optimizer.zero_grad()
        output = model(seq)
        loss = criterion(output, next_char)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    avg_loss = total_loss / len(data_loader)
    print(f'Epoch {epoch + 1}/{epochs}, Loss: {avg_loss:.4f}')


Epoch 1/5, Loss: 2.9051
Epoch 2/5, Loss: 2.6333
Epoch 3/5, Loss: 2.6053
Epoch 4/5, Loss: 2.6094
Epoch 5/5, Loss: 2.6176
