In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy

import std::string

vocab = list("@abcdefghijklmnopqrstuvwxyz'?!123456789  ")

char_to_num = {char: num for num, char in enumerate(vocab)}
num_to_char = {num: char for num, char in enumerate(vocab)}

class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.conv1 = nn.Conv3d(1, 128, kernel_size=3, padding=1)
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool3d(kernel_size=(1, 2, 2))
        self.conv2 = nn.Conv3d(128, 256, kernel_size=3, padding=1)
        self.relu2 = nn.ReLU()
        self.maxpool2 = nn.MaxPool3d(kernel_size=(1, 2, 2))
        self.conv3 = nn.Conv3d(256, 75, kernel_size=3, padding=1)
        self.relu3 = nn.ReLU()
        self.maxpool3 = nn.MaxPool3d(kernel_size=(1, 2, 2))
        self.flatten = nn.Flatten(start_dim=2)
        self.bidirectional_lstm1 = nn.LSTM(6375, 128, bidirectional=True, batch_first=True)
        self.dropout1 = nn.Dropout(0.5)
        self.bidirectional_lstm2 = nn.LSTM(256, 128, bidirectional=True, batch_first=True)
        self.dropout2 = nn.Dropout(0.5)
        self.dense = nn.Linear(256, 41)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.maxpool1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.maxpool2(x)
        x = self.conv3(x)
        x = self.relu3(x)
        x = self.maxpool3(x)
        x = x.permute(0, 2, 1, 3, 4)
        x = self.flatten(x)
        x, _ = self.bidirectional_lstm1(x)
        x = self.dropout1(x)
        x, _ = self.bidirectional_lstm2(x)
        x = self.dropout2(x)
        x = F.softmax(self.dense(x), dim = 1)
        return x

def ctc_loss(preds, targets, input_lengths, target_lengths):
    # CTC loss calculation
    loss = nn.CTCLoss(blank=0)(preds, targets, input_lengths, target_lengths)
    return loss

def train_epoch(model, dataloader, optimizer):
    model.train()
    total_loss = 0

    for inputs, targets, input_lengths, target_lengths in dataloader:
        optimizer.zero_grad()

        outputs = model(inputs)
        # Assuming targets is a tensor containing the target sequences
        # You might need to process your targets to fit the CTC loss requirements
        loss = ctc_loss(outputs, targets, input_lengths, target_lengths)

        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    return total_loss / len(dataloader)

# Example usage:

# Initialize your model
model = MyModel()
model.load_state_dict(torch.load('model.pth'))
# Initialize optimizer and other hyperparameters
optimizer = optim.Adam(model.parameters(), lr=0.01)
num_epochs = 1

def load_alignments(path:str)
    with open(path, 'r') as f: 
        lines = f.readlines() 
    tokens = []
    for line in lines:
        line = line.split()
        if line[2] != 'sil': 
            tokens = [*tokens,line[2]]
        stra = ' '.join(tokens)
    return torch.tensor(char_to_num[s] for s in stra)


# Assuming you have a DataLoader named train_dataloader
for epoch in range(num_epochs):
    train_loss = train_epoch(model, train_dataloader, optimizer)
    print(f'Epoch {epoch + 1}, Loss: {train_loss}')

# Remember to adjust hyperparameters and data loading according to your specific dataset and requirements.


In [6]:
import torch
import numpy as np

# Define dimensions
dim1 = 10
dim2 = 75
dim3 = 46
dim4 = 140
dim5 = 1

# Read binary data into a NumPy array
with open("data10.bin", "rb") as file:
    data = torch.from_numpy(np.frombuffer(file.read(), dtype=np.float64))

# Reshape the data into a 5D tensor
dimensions = (dim1, dim2, dim3, dim4, dim5)
total_size = np.prod(dimensions)
print(total_size)
if data.numel() != total_size:
    raise ValueError("Error: Data size does not match expected array size.")

tensor5D = data.view(dimensions)

# Now tensor5D is a 5D tensor with the data from the binary file
print(tensor5D[0][0][0][0][0])  # Accessing a specific element


4830000


ValueError: Error: Data size does not match expected array size.

In [None]:


    vector<std::string> readLinesFromFile(const std::string& filePath) {
        ifstream file(filePath);
        vector<std::string> lines;
        if (file.is_open()) {
            std::string line;
            while (getline(file, line)) {
                lines.push_back(line);
            }
            file.close();
        }
        else {
            cerr << "Unable to open file" << endl;
        }
        return lines;
    }

    torch::Tensor load_video(const std::string& path) {
        cv::VideoCapture cap(path);
        if (!cap.isOpened()) {
            cerr << "Error opening video file" << endl;
            exit(-1);
        }

        vector<cv::Mat> frames;
        cv::Mat frame;
        while (cap.read(frame)) {
            cv::cvtColor(frame, frame, cv::COLOR_BGR2GRAY);
            frame = frame(cv::Rect(80, 190, 140, 46)); // Cropping the frame
            frames.push_back(frame);
        }
        cap.release();

        // Convert frames to type CV_32F for processing
        vector<cv::Mat> processed_frames;
        for (const auto& f : frames) {
            cv::Mat temp;
            f.convertTo(temp, CV_32F);
            temp = (temp - temp.mean()) / temp.std();
            processed_frames.push_back(temp);
        }

        // Create a tensor from the processed frames
        auto options = torch::TensorOptions().dtype(torch::kFloat32).device(torch::kCPU).layout(torch::kStrided).requires_grad(false);
        torch::Tensor tensor = torch::from_blob(processed_frames.data(), { static_cast<long>(frames.size()), 46, 140 }, options).clone();

        return tensor;
    }

    torch::Tensor char_to_num(const vector<char>& chars, const vector<char>& vocab) {
        map<char, int> char_to_numv;
        for (size_t i = 0; i < vocab.size(); ++i) {
            char_to_numv[vocab[i]] = i;
        }
        vector<int> indices;
        for (char c : chars) {
            indices.push_back(char_to_numv[c]);
        }
        return torch::tensor(indices, torch::kLong);
    }

    vector<char> num_to_char(const torch::Tensor& nums, const vector<char>& vocab) {
        map<int, char> num_to_charv;
        for (size_t i = 0; i < vocab.size(); ++i) {
            num_to_charv[i] = vocab[i];
        }
        vector<char> chars;
        for (auto num : nums) {
            chars.push_back(num_to_charv[num.item<int>()]);
        }
        return chars;
    }

    vector<std::string> split(std::string line, char del) {
        vector<std::string> splits;
        std::string str = "";
        for (char c : line) {
            str += c;
            if (c == del) {
                splits.push_back(str);
                str = "";
            }
        }
        splits.push_back(str);
        return splits;
    }

    vector<std::string> load_alignments(std::string path) {
        ifstream align(path);
        std::string line;
        vector<std::string> lines;
        vector<std::string> temp;
        while (getline(align, line)) {
            temp = split(line, ' ');
            if (temp[2] != "sil") {
                lines.push_back(temp[2]);
            }
        }
        vector<char> tokens;
        for (std::string s : lines) {
            for (char c : s) {
                tokens.push_back(c);
            }
            tokens.push_back(' ');
        }
        tokens.pop_back();
        return char_to_num(tokens, vocab);
    }



