In [1]:
#Webtoon_Analysis
#Comments_Sentiments Classification
#Web_Scraping

In [None]:
import requests
from bs4 import BeautifulSoup
from transformers import pipeline
import re

# Initialize the Hugging Face summarization model
summarizer = pipeline("summarization")

# Function to determine the genre based on keyword matches
def determine_genre(text):
    genres = {
        "romance": ["love", "romantic", "relationship", "heart"],
        "action": ["fight", "battle", "war", "adventure"],
        "fantasy": ["magic", "fantasy", "supernatural", "dream"],
        "comedy": ["funny", "humor", "comedy", "laugh"],
    }
    for genre, keywords in genres.items():
        if any(re.search(rf'\b{k}\b', text, re.IGNORECASE) for k in keywords):
            return genre
    return "Unknown"

# Function to summarize content in chunks
def summarize_content(content, max_length=1024):
    # Split content into manageable chunks
    content_chunks = [content[i:i + max_length] for i in range(0, len(content), max_length)]
    summaries = []
    for chunk in content_chunks:
        if len(chunk.split()) > 50:
            summary = summarizer(chunk, max_length=100, min_length=30, do_sample=False)[0]['summary_text']
            summaries.append(summary)
    return ' '.join(summaries)

# Function to scrape, summarize, and determine genre from a webpage
def scrape_and_analyze(url):
    # Step 1: Scrape the webpage
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Step 2: Extract all paragraphs
    paragraphs = [p.text for p in soup.find_all('p')]
    content = ' '.join(paragraphs)

    # Step 3: Summarize the content
    summary = summarize_content(content)

    # Step 4: Determine the genre based on the content
    genre = determine_genre(content)

    return summary, genre

# Example usage
# url = 'https://animemangatoon.com/top-anime-and-k-drama-like-true-beauty/'
# summary, genre = scrape_and_analyze(url)
urls = ['https://animemangatoon.com/top-anime-and-k-drama-like-true-beauty/', 'https://animemangatoon.com/i-love-yoo-shin-ae-and-yeong-gis-path/', 'https://animemangatoon.com/operation-true-love-su-ae-and-minu/', 'https://animemangatoon.com/operation-true-love-su-ae-and-eunhyeok/', 'https://animemangatoon.com/refund-high-school-the-beginning-of-the-second-year/' ]
#You can Add more websites here for your convenience, I have added 5.

webtoon_data = []
for url in urls:
    summary, genre = scrape_and_analyze(url)
    dict={}
    dict['description']=summary
    dict['genre']=genre
    webtoon_data.append(dict)


#This Code Cell Will take time to execute.


In [61]:
# for dct in webtoon_data:
#   print(dct['description'][:100], dct['genre'])

#Checking the functionality of above code.

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

# Preparing the data
descriptions = [item['description'] for item in webtoon_data]
genres = [item['genre'] for item in webtoon_data]

print (descriptions)
print (genres)
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(descriptions)
# print(X)



In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, genres, test_size=0.2)

classifier = DecisionTreeClassifier()
classifier.fit(X_train, y_train)


In [None]:
y_pred = classifier.predict(X_test)
print(y_pred)

#Marks The End of Task-1

In [None]:
comments = [
    "I love this story, it's amazing!",
    "This is the worst webtoon I've ever read.",
    "Happy sadd sad sad sad sad"
]


from textblob import TextBlob

comments = [
    "I love this story, it's amazing!",
    "This is the worst webtoon I've ever read.",
    "Happy sadd sad sad sad sad"
]



for comment in comments:
    blob = TextBlob(comment)
    sentiment = 'positive' if blob.sentiment.polarity > 0 else 'negative'
    print(f"Comment: {comment}, Sentiment: {sentiment}")


#Task 2 Complete.

In [77]:
import requests
from bs4 import BeautifulSoup
import google.generativeai as genai

# Configure the API key for Gemini
gem_api_key = "Your_Gemini API"  # Replace with your actual API key
genai.configure(api_key=gem_api_key)

# Function to scrape relevant content from the webpage
def scrape_castle_swimmer():
    url = "https://animemangatoon.com/castle-swimmer-unveiling-new-prophecy/"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Extract relevant paragraphs for context
    paragraphs = [p.text for p in soup.find_all('p')]
    content = ' '.join(paragraphs)  # Combine all paragraphs
    return content

# Function to generate a response using Gemini
def get_response_from_gemini(user_query, context):

    generation_config = {
    "temperature": 1,
    "top_p": 0.95,
    "top_k": 64,
    "max_output_tokens": 8192,
    "response_mime_type": "text/plain",

    }

    model = genai.GenerativeModel(
    model_name="gemini-1.5-pro",
    generation_config=generation_config,
    )

    chat_session = model.start_chat(history=[])




    system_instruction = (
        f"answer the question {user_query}, based on {context} "
    )

    response = model.generate_content(system_instruction)

    return response.text


# Main chatbot loop
if __name__ == "__main__":
    content = scrape_castle_swimmer()

    while True:
        user_query = input("Ask me anything about Castle Swimmer (type 'exit' to quit): ")
        if user_query.lower() == 'exit':
            break
        response = get_response_from_gemini(user_query, content)
        print("AI:", response)


# Task 3 completed.

Ask me anything about Castle Swimmer (type 'exit' to quit): exit
