In [11]:
import wikipedia
import time

class WikiSearch:
    def __init__(self) -> None:
        self.terms = ['actor', 'actress', 'director']
        self.logs = []
        self.cache = {}
        self.count = {
            "cache_hit": 0,
            "cache_miss": 0,
            "total_time": 0, # for cache_miss
            "exception_count": 0, # for exception
            "exception_time": 0 # exception total time
        }
    
    # def __del__(self):
    #     self.print_stats()
    
    def print_stats(self):
        print("---------------WikiSearch stats:---------------")
        for k, v in self.count.items():
            print(f"{k}: {v}")

    def check_validity(self, name, content):
        for n in name.lower().split():
            if content.lower().find(n) != -1:
                return True
        return False

    def most_frequent_term(self, content, terms) -> list:
        count_map = {term: content.lower().count(term) for term in terms}
        counts = sorted(count_map.items(), key=lambda x: x[1], reverse=True)
        max_count = counts[0][1]
        
        if max_count > 0:
            most_frequent = [term for term, count in counts if count == max_count]
        else:
            most_frequent = []
        return most_frequent

    def possible_job_list(self, name) -> list:
        try:
            start = time.time()
            if name in self.cache:
                self.count["cache_hit"] += 1
                self.count["total_time"] += time.time() - start
                return self.cache[name]

            search_results = wikipedia.search(name)
            print(search_results[:3])
            if not search_results:
                self.logs.append(f"- wikipedia: No search results found for '{name}'")
                self.cache[name] = []
                self.countexception_count += 1
                self.count["exception_time"] += time.time() - start
                return []
            
            content = ""
            for res in search_results[:3]:
                content = wikipedia.summary(res, sentences=5, auto_suggest=False)
                if self.check_validity(name, content):
                    break
            
            if not content:
                self.cache[name] = []
                self.count["cache_miss"] += 1
                self.count["total_time"] += time.time() - start
                return []
            res = self.most_frequent_term(content, self.terms)
            self.cache[name] = res
            self.count["cache_miss"] += 1
            self.count["total_time"] += time.time() - start
            return res
        except wikipedia.exceptions.DisambiguationError as e:
            self.logs.append(f"- wikipedia: Ambiguous. Possible matches include: {e.options} for '{name}'")
            self.count["exception_count"] += 1
            self.count["exception_time"] += time.time() - start
            return []
        except wikipedia.exceptions.PageError:
            self.logs.append(f"- wikipedia: The page does not exist: '{name}'")
            self.count["exception_count"] += 1
            self.count["exception_time"] += time.time() - start
            return []
        except Exception as e:
            self.logs.append(f"- wikipedia: {e}")
            self.count["exception_count"] += 1
            self.count["exception_time"] += time.time() - start
            return []
    
    def write_logs(self):
        if self.logs:
            with open("wiki_search_logs.txt", "w") as f:
                f.write("\n".join(self.logs))

In [12]:
names = ["Lee Ang", "ah Lee Ang", "Ang Lee", "Booo Ang Lee", "Lee Ang", "Ang Lee", "Boooooooo Ang Lee", "Boooooooo Ang Lee"]
wiki = WikiSearch()
for name in names:
    result = wiki.possible_job_list(name)
    print(f"'{name}' is likely an actor/actress or director: {result}")
wiki.print_stats()

['Ang Lee', 'Hulk (film)', 'Michelle Ang']
'Lee Ang' is likely an actor/actress or director: ['director']
['Lee Ji-ah', 'Lee Chung-ah', 'Lee Ah-hyun']
'ah Lee Ang' is likely an actor/actress or director: ['actress']
['Ang Lee', 'Hulk (film)', 'Life of Pi (film)']
'Ang Lee' is likely an actor/actress or director: ['director']
[]
'Booo Ang Lee' is likely an actor/actress or director: []
'Lee Ang' is likely an actor/actress or director: ['director']
'Ang Lee' is likely an actor/actress or director: ['director']
[]
'Boooooooo Ang Lee' is likely an actor/actress or director: []
'Boooooooo Ang Lee' is likely an actor/actress or director: []
---------------WikiSearch stats:---------------
cache_hit: 3
cache_miss: 3
total_time: 0.0001697540283203125
exception_count: 2
exception_time: 1.621246337890625e-05


In [10]:
print(type(wikipedia.page("Maggie Smith")))
# print(wikipedia.summary("Maggie Smith", sentences=2))

<class 'wikipedia.wikipedia.WikipediaPage'>


In [4]:
from imdb import Cinemagoer
import json

# create an instance of the Cinemagoer class
ia = Cinemagoer()

people = ia.search_person("Bryn Cranston")
first_person = people[0]
person = ia.get_person(first_person.personID)

print(person.get('name'))

def print_filmography(person):
    filmography = person.get('filmography', {})
    
    for category, movies in filmography.items():
        print(f"\n{category.upper()}:\n{'-' * 40}")
        for movie in movies:
            title = movie.get('title')
            year = movie.get('year')
            if title and year:
                print(f"{title} ({year})")
            elif title:
                print(title)

print_filmography(person)

# def is_actor_or_actress(person):
#     # Check the filmography
#     filmography = person.get('filmography', {})
    
#     print(json.dumps(filmography, indent=4))
    
#     # Check if they have roles in "actor" or "actress" categories
#     if 'actor' in filmography or 'actress' in filmography:
#         return True
#     return False

# result = is_actor_or_actress(person)
# print(f"{person['name']} is an actor/actress: {result}")

Bryan Cranston

ACTOR:
----------------------------------------
Everything's Going to Be Great
Argylle (2024)
Jackpot
It's Always Sunny in Philadelphia (2023)
Asteroid City (2023)
Your Honor (2020)
PopCorners: Breaking Bad Super Bowl Commercial - Extended Version (2023)
Better Call Saul (2022)
Experience Yosemite (2022)
Jerry and Marge Go Large (2022)
The Stand (2020)
The One and Only Ivan (2020)
Home Movie: The Princess Bride (2020)
Family Guy (2006)
Mountain Dew Zero Sugar: As Good as the Original (2020)
El Camino: A Breaking Bad Movie (2019)
SuperMansion (2015)
Last Week Tonight with John Oliver (2019)
Robot Chicken (2011)
Isle of Dogs (2018)
Electric Dreams (2017)
Curb Your Enthusiasm (2017)
Last Flag Flying (2017)
The Disaster Artist (2017)
The Upside (2017)
Power Rangers (2017)
Sneaky Pete (2015)
Why Him? (2016)
Saturday Night Live (2016)
In Dubious Battle (2016)
Wakefield (2016)
The Infiltrator (2016)
All the Way (2016)
Get a Job (2016)
Zendaya: Neverland (2016)
Kung Fu Panda 3 

In [5]:
from imdb import Cinemagoer
import json

# create an instance of the Cinemagoer class
ia = Cinemagoer()

def print_movie_info(movie_name):
    # Search for the movie
    movies = ia.search_movie(movie_name)
    if not movies:
        print(f"No movie found with the name '{movie_name}'.")
        return

    # Get detailed information about the first matching movie
    movie = ia.get_movie(movies[0].movieID)
    
    # Print details
    title = movie.get('title')
    year = movie.get('year')
    directors = ", ".join([person['name'] for person in movie.get('directors', [])])
    cast = ", ".join([person['name'] for person in movie.get('cast', [])])  # Top 5 actors
    plot = movie.get('plot outline')
    rating = movie.get('rating')
    genres = ", ".join(movie.get('genres', []))
    
    print(f"Title: {title}")
    print(f"Year: {year}")
    print(f"Directors: {directors}")
    print(f"Top Cast: {cast}")
    print(f"Plot Outline: {plot}")
    print(f"Rating: {rating}")
    print(f"Genres: {genres}")

# Example usage
movie_name = "Hatfields McCoys"
print_movie_info(movie_name)

Title: Hatfields & McCoys
Year: 2012
Directors: 
Top Cast: Kevin Costner, Bill Paxton, Matt Barr, Tom Berenger, Powers Boothe, Andrew Howard, Jena Malone, Sarah Parish, Lindsay Pulsipher, Ronan Vibert, Joe Absolom, Noel Fisher, Boyd Holbrook, Tom McKay, Sam Reid, Mare Winningham, Greg Patmore, Max Deacon, John Bell, Nick Dunning, Katie Griffiths, Joy McBrinn, Adam Jones, Darrell Fetty, Jon Bloch, Bill Millsap, Michael Woods, Park Chisolm, Stacha Hicks, Levi Bowling, Jilon VanOver, Damian O'Hare, Andy Gathergood, Christopher Hatherall, Noah Taylor, Ben Cartwright, Jack Laskey, Jonathan Fredrick, Michael Jibson, Chad Hugghins, Tyler Jackson, Alixandra Fuchs, Lloyd Hutchinson, Michael Greco, Rebecca Calder, Serbanescu Florin, Constantin Viscreanu, Ionut Alexandru, James Carroll Jordan, Rob Moran
Plot Outline: The story of the feud that nearly launched a war between Kentucky and West Virginia. Devil Anse Hatfield and Randall McCoy were close friends until near the end of the Civil War, whe