In [None]:
# prompt: write a programme that analyses all the oscar-winning movies and generates the next super hit film. The inputs are taken from the internet and include the locations, cast, script, plot, themes and storyline.

# install libraries
!pip install beautifulsoup4
!pip install requests

# import libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Scrape Oscar-winning movie data (replace with actual source)
url = "https://www.example-oscar-data.com"
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")

# Extract relevant features (locations, cast, script, plot, themes, storyline)
movies = []
for movie_data in soup.find_all("div", class_="movie-info"):
    title = movie_data.find("h2").text
    location = movie_data.find("span", class_="location").text
    cast = [actor.text for actor in movie_data.find_all("span", class_="actor")]
    # ... (extract other features similarly)
    movies.append({
        "title": title,
        "location": location,
        "cast": cast,
        # ... (other features)
    })

# Create a DataFrame
df = pd.DataFrame(movies)

# Text preprocessing (clean, tokenize, etc.)
# ... (implement text preprocessing steps)

# Feature engineering (combine text features)
df["combined_features"] = df["location"] + " " + df["cast"].str.join(" ") + " "  # ... (combine other features)

# TF-IDF vectorization
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(df["combined_features"])

# Cosine similarity calculation
cosine_sim = cosine_similarity(tfidf_matrix)

# Function to get movie recommendations
def get_recommendations(title, cosine_sim=cosine_sim):
    idx = df[df["title"] == title].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]  # Top 10 similar movies
    movie_indices = [i[0] for i in sim_scores]
    return df["title"].iloc[movie_indices]

# Example usage
recommendations = get_recommendations("Your Favorite Oscar Movie")
print(recommendations)

# Generate the "super hit" movie (this is highly speculative and creative)
# ... (use the analyzed data to create a new movie concept)




ConnectionError: HTTPSConnectionPool(host='www.example-oscar-data.com', port=443): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x7f7d3d415bd0>: Failed to resolve 'www.example-oscar-data.com' ([Errno -2] Name or service not known)"))

In [None]:
# prompt: write a programme which analyses movie and film reviews in Bollywood including commercial blockbusters. It then generates using machine learning and ai what the next blockbuster and critical hit movie will be.

import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')

# Data Collection and Preprocessing

# Scrape reviews from various sources (e.g., IMDB, Rotten Tomatoes, Bollywood Hungama)
def scrape_reviews(url):
  response = requests.get(url)
  soup = BeautifulSoup(response.content, "html.parser")
  reviews = [review.text for review in soup.find_all("div", class_="review-text")] # Adapt class name as needed
  return reviews

imdb_reviews = scrape_reviews("https://www.imdb.com/title/tt1234567/reviews") # Replace with actual movie URLs
rotten_tomatoes_reviews = scrape_reviews("https://www.rottentomatoes.com/m/your_movie_title/reviews")
# ... (add more sources)

# Combine reviews into a single DataFrame
reviews_df = pd.DataFrame({
    "source": ["IMDB"] * len(imdb_reviews) + ["Rotten Tomatoes"] * len(rotten_tomatoes_reviews) + ...,
    "review": imdb_reviews + rotten_tomatoes_reviews + ...
})

# Clean and preprocess text data
# ... (remove punctuation, lowercase, handle special characters, etc.)

# Sentiment Analysis

# Perform sentiment analysis on reviews
sia = SentimentIntensityAnalyzer()
reviews_df["sentiment_score"] = reviews_df["review"].apply(lambda x: sia.polarity_scores(x)["compound"])

# Feature Engineering

# Extract relevant features from reviews and movie data
# ... (e.g., keywords, themes, genres, cast, director, budget, box office performance)

# Combine features into a single representation
# ... (consider using TF-IDF or other embedding techniques)

# Model Building

# Train a machine learning model (e.g., regression, classification) to predict movie success
# ... (use features from reviews and movie data as input, and box office performance or critical acclaim as target)

# Super Hit Movie Generation

# Use the trained model to generate a "super hit" movie concept
# ... (optimize features based on model predictions)

# Example: Generate a movie title based on popular keywords
def generate_title(keywords):
  # ... (use a language model or other creative approach to generate a catchy title)
  return "Super Hit Movie Title"

# Print the generated movie concept
print("Super Hit Movie Concept:")
print("Title:", generate_title(popular_keywords))
# ... (print other generated features)


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


TypeError: can only concatenate list (not "ellipsis") to list

In [None]:
# prompt: write a programme which takes movie ratings from the internet and generates a formula for a critical superhit and commercially successful movie. Use the parameters under which the movies were made from the internet.

import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# install libraries
!pip install beautifulsoup4
!pip install requests

nltk.download('vader_lexicon')

# --- Data Collection and Preprocessing ---

# Function to scrape movie data (adapt for different sources)
def scrape_movie_data(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")
    movies = []
    for movie_data in soup.find_all("div", class_="movie-info"):  # Adapt class name as needed
        title = movie_data.find("h2").text
        rating = movie_data.find("span", class_="rating").text  # Adapt class name as needed
        genre = movie_data.find("span", class_="genre").text  # Adapt class name as needed
        # ... (extract other features: director, cast, budget, box office, etc.)
        movies.append({
            "title": title,
            "rating": rating,
            "genre": genre,
            # ... (other features)
        })
    return movies

# Scrape data from multiple sources (replace with actual URLs)
imdb_movies = scrape_movie_data("https://www.imdb.com/chart/top/")
rotten_tomatoes_movies = scrape_movie_data("https://www.rottentomatoes.com/top/bestofrt/")
# ... (add more sources)

# Combine movie data into a single DataFrame
movies_df = pd.DataFrame(imdb_movies + rotten_tomatoes_movies)

# Clean and preprocess data
movies_df["rating"] = pd.to_numeric(movies_df["rating"], errors='coerce')  # Convert ratings to numeric
# ... (handle missing values, clean text features, etc.)

# --- Sentiment Analysis ---

# Scrape reviews (adapt for different sources)
def scrape_reviews(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")
    reviews = [review.text for review in soup.find_all("div", class_="review-text")]  # Adapt class name as needed
    return reviews

# Get reviews for each movie (this might be time-consuming)
movies_df["reviews"] = movies_df["title"].apply(lambda title: scrape_reviews("https://www.example.com/reviews/" + title))

# Perform sentiment analysis
sia = SentimentIntensityAnalyzer()
movies_df["sentiment_score"] = movies_df["reviews"].apply(lambda reviews: np.mean([sia.polarity_scores(review)["compound"] for review in reviews]))

# --- Feature Engineering ---

# Extract relevant features (example: one-hot encode genres)
genre_dummies = pd.get_dummies(movies_df["genre"], prefix="genre")
movies_df = pd.concat([movies_df, genre_dummies], axis=1)

# ... (engineer other features: cast popularity, director success, etc.)

# --- Model Building ---

# Select features and target variable
features = ["sentiment_score", "genre_Action", "genre_Comedy", ...]  # Choose relevant features
target = "rating"

X = movies_df[features]
y = movies_df[target]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Evaluate model performance
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

# --- Super Hit Formula Generation ---

# Print coefficients of the model (interpret these to understand feature importance)
print("Coefficients:")
for feature, coef in zip(features, model.coef_):
    print(f"{feature}: {coef}")

# Example: Generate a hypothetical super hit movie concept
super_hit_features = {
    "sentiment_score": 0.9,  # Very positive sentiment
    "genre_Action": 1,      # Action genre
    "genre_Comedy": 0,      # Not a comedy
    # ... (set other features based on model insights)
}

# Predict the rating for the super hit concept
super_hit_rating = model.predict([super_hit_features])[0]
print("Predicted Rating for Super Hit Movie:", super_hit_rating)

# --- Conclusion ---

# This code provides a framework for analyzing movie data and generating a "formula" for success.
# The actual formula will depend on the specific data, features, and model used.
# Remember that movie success is complex and influenced by many factors beyond this simplified analysis.




[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


KeyError: 'rating'