<a href="https://colab.research.google.com/github/AksharaVaidee/2022305027/blob/main/spam%20detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install necessary packages
!pip install gradio pandas scikit-learn

# Import libraries
import gradio as gr
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline

# Alternative working dataset URL
url = "https://raw.githubusercontent.com/dD2405/Twitter_Sentiment_Analysis/master/train.csv"

# Load dataset
df = pd.read_csv(url, encoding='latin-1')
df = df[['label', 'tweet']]
df.columns = ["label", "message"]

# Convert labels: 1 = Spam, 0 = Not Spam
df['label'] = df['label'].map({0: 0, 1: 1})

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(df["message"], df["label"], test_size=0.2, random_state=42)

# Create a text classification pipeline
model = make_pipeline(TfidfVectorizer(), MultinomialNB())

# Train the model
model.fit(X_train, y_train)

# Function for spam detection
def predict_spam(text):
    prediction = model.predict([text])[0]
    return "Spam" if prediction == 1 else "Not Spam"

# Create Gradio UI
iface = gr.Interface(
    fn=predict_spam,
    inputs=gr.Textbox(lines=2, placeholder="Enter a message..."),
    outputs="text",
    title="Spam Detector",
    description="Enter a message to check if it's spam or not."
)

# Launch the Gradio app
iface.launch(share=True)

Collecting gradio
  Downloading gradio-5.23.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 



In [None]:
# Install necessary packages
!pip install gradio transformers sentencepiece

# Import libraries
import gradio as gr
from transformers import MarianMTModel, MarianTokenizer

# Define language codes (MarianMT uses ISO 639-1 codes)
lang_codes = {
    "Tamil": "ta",
    "Hindi": "hi",
    "French": "fr",
    "Spanish": "es",
    "German": "de",
    "Dutch": "nl"
}

# Function to load model dynamically based on language
def load_model(target_lang):
    model_name = f"Helsinki-NLP/opus-mt-en-{target_lang}"
    tokenizer = MarianTokenizer.from_pretrained(model_name)
    model = MarianMTModel.from_pretrained(model_name)
    return model, tokenizer

# Function for translation
def translate_text(text, target_language):
    if target_language not in lang_codes:
        return "Unsupported language."

    target_lang_code = lang_codes[target_language]
    model, tokenizer = load_model(target_lang_code)

    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    translated = model.generate(**inputs)
    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)

    return translated_text

# Create Gradio UI
iface = gr.Interface(
    fn=translate_text,
    inputs=[
        gr.Textbox(lines=3, placeholder="Enter text in English..."),
        gr.Dropdown(list(lang_codes.keys()), label="Select Target Language")
    ],
    outputs="text",
    title="Language Translator",
    description="Translate English text to Tamil, Hindi, French, Spanish, German, or Dutch."
)

# Launch Gradio app
iface.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://2f700d0b3d21045c3a.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
# Install necessary packages
!pip install gradio transformers

# Import libraries
import gradio as gr
from transformers import pipeline

# Load OpenAI's GPT model for Q&A
chatbot = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.1")

# Function to answer TB-related questions
def tb_chatbot(user_query):
    prompt = f"Answer this question about tuberculosis in detail: {user_query}"
    response = chatbot(prompt, max_length=200, num_return_sequences=1)
    return response[0]["generated_text"]

# Create Gradio UI
iface = gr.Interface(
    fn=tb_chatbot,
    inputs=gr.Textbox(lines=2, placeholder="Ask about tuberculosis..."),
    outputs="text",
    title="Tuberculosis Knowledge Chatbot",
    description="Ask anything about TB: symptoms, diagnosis, treatment, drug resistance, BCG vaccine, etc."
)

# Launch Gradio app
iface.launch(share=True)



OSError: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1.
401 Client Error. (Request ID: Root=1-67e26d9e-5d653e21168e860255f3981b;593fd53e-60eb-4f65-ba9c-00a0fffaf7ec)

Cannot access gated repo for url https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/resolve/main/config.json.
Access to model mistralai/Mistral-7B-Instruct-v0.1 is restricted. You must have access to it and be authenticated to access it. Please log in.

In [None]:
# Install necessary packages
!pip install gradio transformers torch

# Import libraries
import gradio as gr
from transformers import pipeline

# Load a more powerful open-access chatbot model
chatbot = pipeline("text2text-generation", model="google/flan-t5-xl")

# Function to generate detailed answers
def tb_chatbot(user_query):
    prompt = f"Provide a detailed and informative response about tuberculosis: {user_query}"
    response = chatbot(prompt, max_length=5120, min_length=100, do_sample=True, top_k=50, temperature=0.7, num_return_sequences=1)
    return response[0]["generated_text"]

# Create Gradio UI
iface = gr.Interface(
    fn=tb_chatbot,
    inputs=gr.Textbox(lines=3, placeholder="Ask about tuberculosis..."),
    outputs="text",
    title="Tuberculosis Knowledge Chatbot",
    description="Ask anything about TB: symptoms, diagnosis, treatment, drug resistance, BCG vaccine, etc."
)

# Launch Gradio app
iface.launch(share=True)



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cpu


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://c0d5ef4edeeeb96c06.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
# Install required packages
!pip install gradio moviepy transformers torch gtts pydub

# Import libraries
import gradio as gr
import os
from moviepy.editor import VideoFileClip
from transformers import pipeline
from gtts import gTTS
from pydub import AudioSegment

# Load Whisper AI for speech recognition
asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-large")

# Define language codes for translation
lang_codes = {
    "Tamil": "ta",
    "Hindi": "hi",
    "French": "fr",
    "Spanish": "es",
    "German": "de",
    "Dutch": "nl"
}

# Load translation model
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-multi")

# Function to process video and translate audio
def translate_video(video_path, target_language):
    # Extract audio from video
    video = VideoFileClip(video_path)
    audio_path = "extracted_audio.wav"
    video.audio.write_audiofile(audio_path, codec="pcm_s16le")

    # Transcribe audio
    transcript = asr_pipeline(audio_path)["text"]

    # Translate text
    if target_language in lang_codes:
        translated_text = translator(transcript, max_length=512)[0]["translation_text"]
    else:
        return "Unsupported language selected."

    # Convert translated text to speech
    tts = gTTS(translated_text, lang=lang_codes[target_language])
    translated_audio_path = "translated_audio.mp3"
    tts.save(translated_audio_path)

    return translated_audio_path

# Gradio Interface
iface = gr.Interface(
    fn=translate_video,
    inputs=[
        gr.Video(label="Upload Video"),
        gr.Dropdown(choices=list(lang_codes.keys()), label="Select Target Language")
    ],
    outputs=gr.Audio(label="Translated Audio"),
    title="Video Speech Translator",
    description="Upload a video, and the spoken content will be transcribed, translated, and converted to speech."
)

# Launch Gradio app
iface.launch(share=True)



  if event.key is 'enter':



config.json:   0%|          | 0.00/1.99k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/6.17G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/3.85k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/283k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/836k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.48M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/494k [00:00<?, ?B/s]

normalizer.json:   0%|          | 0.00/52.7k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/34.6k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.19k [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/185k [00:00<?, ?B/s]

Device set to use cpu


OSError: Helsinki-NLP/opus-mt-en-multi is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`

In [None]:
# Install necessary packages
!pip install gradio moviepy transformers torch gtts pydub openai-whisper

# Import libraries
import gradio as gr
import os
from moviepy.editor import VideoFileClip
import whisper
from transformers import MarianMTModel, MarianTokenizer
from gtts import gTTS

# Load Whisper Model (Open-Access)
asr_model = whisper.load_model("base")  # Open-source, no authentication required

# Define correct translation models
translation_models = {
    "Tamil": "Helsinki-NLP/opus-mt-en-ta",
    "Hindi": "Helsinki-NLP/opus-mt-en-hi",
    "French": "Helsinki-NLP/opus-mt-en-fr",
    "Spanish": "Helsinki-NLP/opus-mt-en-es",
    "German": "Helsinki-NLP/opus-mt-en-de",
    "Dutch": "Helsinki-NLP/opus-mt-en-nl"
}

# Function to process video and translate audio
def translate_video(video_path, target_language):
    if target_language not in translation_models:
        return "Unsupported language selected."

    # Extract audio from video
    video = VideoFileClip(video_path)
    audio_path = "extracted_audio.wav"
    video.audio.write_audiofile(audio_path, codec="pcm_s16le")

    # Transcribe audio
    transcript = asr_model.transcribe(audio_path)["text"]

    # Load translation model & tokenizer
    model_name = translation_models[target_language]
    tokenizer = MarianTokenizer.from_pretrained(model_name)
    model = MarianMTModel.from_pretrained(model_name)

    # Translate text
    translated_tokens = model.generate(**tokenizer(transcript, return_tensors="pt", padding=True))
    translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)

    # Convert translated text to speech
    lang_code = target_language[:2].lower()  # Get first two letters for TTS
    tts = gTTS(translated_text, lang=lang_code)
    translated_audio_path = "translated_audio.mp3"
    tts.save(translated_audio_path)

    return translated_audio_path

# Gradio Interface
iface = gr.Interface(
    fn=translate_video,
    inputs=[
        gr.Video(label="Upload Video"),
        gr.Dropdown(choices=list(translation_models.keys()), label="Select Target Language")
    ],
    outputs=gr.Audio(label="Translated Audio"),
    title="Video Speech Translator",
    description="Upload a video, and the spoken content will be transcribed, translated, and converted to speech."
)

# Launch Gradio app
iface.launch(share=True)

Collecting openai-whisper
  Downloading openai-whisper-20240930.tar.gz (800 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m800.5/800.5 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting tiktoken (from openai-whisper)
  Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m36.3 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: openai-whisper
  Building wheel for openai-whisper (pyproject.toml) ... [?25l[?25hdone
  Created wheel for openai-whisper: filename=openai_whisper-20240930-py3-none-any.whl size=803406 sha256=a31add62d0b2f5cbdb2f

100%|███████████████████████████████████████| 139M/139M [00:02<00:00, 62.6MiB/s]


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://6ae3b5b8aaf0952e57.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
# Install necessary packages
!pip install gradio gtts pydub

# Import required libraries
import gradio as gr
from gtts import gTTS
import os

# Function to convert text to speech
def text_to_speech(text, language):
    # Convert text to speech
    tts = gTTS(text, lang=language)

    # Save the output file
    output_path = "output_audio.mp3"
    tts.save(output_path)

    return output_path

# Define language options
language_options = {
    "English": "en",
    "Tamil": "ta",
    "Hindi": "hi",
    "French": "fr",
    "Spanish": "es",
    "German": "de",
    "Dutch": "nl"
}

# Create Gradio Interface
iface = gr.Interface(
    fn=text_to_speech,
    inputs=[
        gr.Textbox(label="Enter Text"),
        gr.Dropdown(choices=list(language_options.keys()), label="Select Language")
    ],
    outputs=gr.Audio(label="Generated Speech"),
    title="Text-to-Speech Converter",
    description="Enter text, select a language, and generate speech.",
)

# Launch the app
iface.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://4d84534dfe013fa29a.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


