In [1]:
from dotenv import load_dotenv
import base64
from io import BytesIO
from PIL import Image
from groq import Groq
load_dotenv()

True

In [3]:
image_path ="image/acne.jpg"

def extract_text_from_image(image,format):
    buffered = BytesIO()  # load in RAM
    image.save(buffered,format=format)
    image_base64 = base64.b64encode(buffered.getvalue()).decode()
    return f"data:image/{format.lower()};base64,{image_base64}"

image = Image.open(image_path)
pic = extract_text_from_image(image,format="JPEG")

In [5]:
client = Groq()
model = "meta-llama/llama-4-scout-17b-16e-instruct"

In [7]:
query = "what is wrong with my skin"
messages = [{"role":"user",
             "content":[
                 {"type":"text","text":query},
                 {"type":"image_url","image_url":{"url":pic}}
             ]

             }]

In [8]:
chat_completion = client.chat.completions.create(
    messages=messages,
    model=model
)
print(chat_completion.choices[0].message.content)

I'm not a doctor, but I can try to help you identify some possible causes of skin issues. Based on the image, it appears that the person has some red, inflamed bumps on their cheek and jawline, which could be acne, rosacea, or another skin condition.

To better understand what might be going on with your skin, could you tell me more about your skin concerns? For example:

* What are your skin issues (acne, dryness, sensitivity, etc.)?
* Have you recently changed your skincare routine or products?
* Do you have any allergies or sensitivities?
* Have you noticed any triggers that make your skin worse?

Keep in mind that it's always best to consult a dermatologist for personalized advice and diagnosis. They can help you determine the underlying cause of your skin issues and recommend the most effective treatment plan.


# Voice Record

In [1]:
import speech_recognition as sr
from pydub import AudioSegment
from io import BytesIO

In [2]:
import logging
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s', level=logging.INFO)

In [13]:

def record_audio(file_path):
    """This function recognize the audio and convert it mp3 format and then save it to specified file path"""
    recognizer = sr.Recognizer()
    try:
        with sr.Microphone() as source:
            logging.info("adjusting the ambient noise")
            recognizer.adjust_for_ambient_noise(source,duration=1)
            logging.info("start speaking now....")

            audio = recognizer.listen(source=source,timeout=25)
            logging.info("Recording completed")

             # Convert WAV bytes to AudioSegment
            wav_data = audio.get_wav_data()
            audio_segment = AudioSegment.from_file(BytesIO(wav_data),format="wav")

            # Export as mp3
            audio_segment.export(file_path,format="mp3",bitrate="128k")
            logging.info(f"Audio File saved to {file_path}")

    except Exception as e:
        logging.error(f"Error occured: {e}")

audio_path = "output.mp3"
record_audio(audio_path)


2025-05-23 00:08:48,225 - INFO - adjusting the ambient noise
2025-05-23 00:08:49,242 - INFO - start speaking now....
2025-05-23 00:08:56,723 - INFO - Recording completed
2025-05-23 00:08:56,941 - INFO - Audio File saved to output.mp3


## Speech To Text Model

In [14]:
client = Groq()
model_stt = "whisper-large-v3-turbo"
audio_file = open(audio_path,"rb")

transcription = client.audio.transcriptions.create(
    model=model_stt,
    file = audio_file,
    language="en"
    )
print(transcription.text)

2025-05-23 00:09:29,178 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/audio/transcriptions "HTTP/1.1 200 OK"


 Hello, how are you? Good morning, Mr. I am Hashim Manzoo.


# Text To Speech

In [None]:
import os
from gtts import gTTS


### Play Audio Automaticall as it is Generated

In [None]:
import subprocess
import platform

def play_audio(file_path):
    system = platform.system()

    try:
        if system == "Windows":
            subprocess.run([
                "powershell",
                "-c",
                f"(New-Object Media.SoundPlayer '{file_path}').PlaySync();"
            ])
        elif system == "Darwin":
            subprocess.run(['afplay', file_path], check=True)
        elif system == "Linux":
            subprocess.run(['mpg123', file_path], check=True)
        else:
            print("Unsupported OS.")
    except Exception as e:
        print(f"Error: {e}")


## Google-Text-To-Speech(gTTs)

In [14]:
def text_to_speech_gtts(input_text,file_path):
    audioobj = gTTS(
        text=input_text,
        lang="en",
        slow=False
    )
    audioobj.save(file_path)
    play_audio(file_path)


os.makedirs("ai_audio",exist_ok=True)   # create directory
input_text = "my name is umer and i am very blach man .and i am dirty and ugly hahaha"  #input text
ai_save_path ="ai_audio/ai.mp3" 
text_to_speech_gtts(input_text=input_text,file_path=ai_save_path)

## Eleven Labs Audio

#### Eleven Labs Audio Model

In [9]:
# To check avalibale voices

from elevenlabs.client import ElevenLabs
client = ElevenLabs()

# List available voices
voices = client.voices.get_all()

for voice in voices.voices:
    print(f"{voice.name}: {voice.voice_id}")


Aria: 9BWtsMINqrJLrRacOk9x
Sarah: EXAVITQu4vr4xnSDxMaL
Laura: FGY2WhTYpPnrIDTdsKH5
Charlie: IKne3meq5aSn9XLyUdCD
George: JBFqnCBsd6RMkjVDRZzb
Callum: N2lVS1w4EtoT3dr4eOWO
River: SAz9YHcvj6GT2YYXdXww
Liam: TX3LPaxmHKxFdv7VOQHJ
Charlotte: XB0fDUnXU5powFXDhCwa
Alice: Xb7hH8MSUJpSbSDYk0k2
Matilda: XrExE9yKIg1WjnnlVkGX
Will: bIHbv24MWmeRgasZH58o
Jessica: cgSgspJ2msm6clMCkdW9
Eric: cjVigY5qzO86Huf0OWal
Chris: iP95p4xoKVk53GoZ742B
Brian: nPczCjzI2devNBz1zQrb
Daniel: onwK4e9ZLuTAKqWW03F9
Lily: pFZP5JQG7iQjIQuC4Bku
Bill: pqHfZKP75CvOlQylNhV4


In [None]:
from elevenlabs.client import ElevenLabs

def text_to_speech_elevenlabs(input_text,file_path):
    client = ElevenLabs()
    audio = client.text_to_speech.convert(
        text=input_text,
        voice_id="JBFqnCBsd6RMkjVDRZzb",
        model_id="eleven_turbo_v2",
        output_format="mp3_44100_128"
    )
    with open(file_path,"wb") as f:  # model return generator thats why we save it chunk by chunk
        for chunk in audio:
            f.write(chunk)
    play_audio(file_path)

input_text="Hi my name is haseeb and i am from pakistan and i have BS in Electrical engineering"
ai_save_path ="ai_audio/eleven_labs.mp3" 
text_to_speech_elevenlabs(input_text=input_text,file_path=ai_save_path)
    
