# Imports

In [14]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tkinter as tk
from tkinter import filedialog
import easygui
import speech_recognition as sr
import pydub
from googletrans import Translator
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForSeq2SeqLM

sns.set()

# Speech to Text

In [3]:
def convert_audio_to_text(audio_file):
    recognizer = sr.Recognizer()

    with sr.AudioFile(audio_file) as source:
        print("File is being read......")
        audio = recognizer.record(source)  # Read the entire audio file
    try:
        text = recognizer.recognize_google(audio)  # Use Google Web Speech API for recognition
        return text
    except sr.UnknownValueError:
        return "Google Web Speech API could not understand the audio"
    except sr.RequestError as e:
        return f"Could not request results from Google Web Speech API; {e}"

In [5]:
audio_file = "MainAudio.wav" 
text = convert_audio_to_text(audio_file)

if text:
    print("Transcription:")
    print(text)  
    file_path = "conv.txt"
 
    with open(file_path, 'w') as file:
        file.write(text)

    print(f'Text saved to {file_path}')

else:
    print("No text could be transcribed.")

File is being read......
Transcription:
American accent in 10 seconds is it is saying is he nice say easy nice easy nice easy nice nice
Text saved to conv.txt


# Translation

In [8]:
with open("conv.txt", 'r') as file:
    text = file.read()

print("In which language do you want to convert your text to?")
choice = int(input("Enter 1 for Arabic and 2 for Turkish: "))

while (choice!= 1 and choice != 2):
    choice = int(input("Wrong input try again: "))

if (choice == 1):
    translator = Translator() 
    arabic_translation = translator.translate(text, src='en', dest='ar')
    print(f'English to Arabic: {arabic_translation.text}')
else:
    translator = Translator()    
    turkish_translation = translator.translate(text, src='en', dest='tr')
    print(f'English to Turkish: {turkish_translation.text}')


In which language do you want to convert your text to?
English to Turkish: Amerikan aksanı 10 saniye içinde mi diyor mu?


# Finding Number of People

In [None]:
import subprocess

# Path to the LIUM SpkDiarization tool
lium_spkdiarization_path = '/path/to/lium_spkdiarization.sh'

# Path to the audio file you want to analyze
audio_file_path = 'path/to/audio.wav'

# Call the LIUM SpkDiarization tool
command = [lium_spkdiarization_path, '--fInputMask=' + audio_file_path, '--sOutputMask=output.segments']
subprocess.run(command)

# Read the output file containing speaker information
with open('output.segments', 'r') as f:
    lines = f.readlines()

# Count the number of speakers
num_speakers = len(set(line.split()[7] for line in lines))
print(f'Number of speakers: {num_speakers}')


# Sentiment Analysis

### NLTK Approach

In [11]:
nltk.download('vader_lexicon')
analyzer = SentimentIntensityAnalyzer()
scores = analyzer.polarity_scores(text)
positive_percent = scores['pos'] * 100
negative_percent = scores['neg'] * 100

print("Sentiment Analysis Results:")
print(f"Positive Sentiment: {positive_percent:.2f}%")
print(f"Negative Sentiment: {negative_percent:.2f}%")

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\haris\AppData\Roaming\nltk_data...


Sentiment Analysis Results:
Positive Sentiment: 65.40%
Negative Sentiment: 0.00%


### Bert base model approach

In [13]:
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")

Downloading (…)okenizer_config.json: 100%|██████████| 48.0/48.0 [00:00<?, ?B/s]
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Downloading (…)lve/main/config.json: 100%|██████████| 629/629 [00:00<?, ?B/s] 
Downloading (…)solve/main/vocab.txt: 100%|██████████| 232k/232k [00:00<00:00, 337kB/s]
Downloading model.safetensors: 100%|██████████| 268M/268M [09:55<00:00, 450kB/s] 


In [15]:
inputs = tokenizer(text=text, return_tensors="pt")
with torch.no_grad():
    logits = model(**inputs).logits

predicted_class_id = logits.argmax().item()
temp = model.config.id2label[predicted_class_id]

In [16]:
temp

'POSITIVE'

# Summary

In [18]:
tokenizer = AutoTokenizer.from_pretrained("google/pegasus-xsum")
model = AutoModelForSeq2SeqLM.from_pretrained("google/pegasus-xsum")

Downloading (…)/main/tokenizer.json: 100%|██████████| 3.52M/3.52M [00:04<00:00, 811kB/s]
Downloading (…)cial_tokens_map.json: 100%|██████████| 65.0/65.0 [00:00<?, ?B/s]


KeyboardInterrupt: 