In [12]:
#Representing the Dataset
#A list of dictionaries keeps all attributes of one entity together
#which reduces misalignment errors and makes filtering, grouping, and analysis easier. 
#Each dictionary represents a single data point (row), similar to how real datasets are 
#structured in tables, while parallel lists can easily get out of sync and are 
#*harder to reason about.
movies = []
film = ['Iron Man' , 'Batman' , 'Thor' , 'Joker' , 'Avengers' , 'Logan']
genre = ['Action' , 'Action' , 'Fantasy' , 'Drama' , 'Action' , 'Drama']
rating = [8.5 , 8.2 , 7.9 , 9.0 , 8.8 , 8.6]
views = [120 , 110 , 95 , 140 , 160 , 100]
for i in range(0, len(film)):
    movieRow = {"movie" : film[i] , "genre" : genre[i] , "rating" : rating[i], "views" : views[i]}
    movies.append(movieRow)
for movieRow in movies:
    print(movieRow)

{'movie': 'Iron Man', 'genre': 'Action', 'rating': 8.5, 'views': 120}
{'movie': 'Batman', 'genre': 'Action', 'rating': 8.2, 'views': 110}
{'movie': 'Thor', 'genre': 'Fantasy', 'rating': 7.9, 'views': 95}
{'movie': 'Joker', 'genre': 'Drama', 'rating': 9.0, 'views': 140}
{'movie': 'Avengers', 'genre': 'Action', 'rating': 8.8, 'views': 160}
{'movie': 'Logan', 'genre': 'Drama', 'rating': 8.6, 'views': 100}


In [14]:
#Filtering Rows
# #| My code                     | Pandas equivalent (later) |
# | ----------------------------- | ------------------------- |
# | list of dicts                 | DataFrame                 |
# | `for movieRow in movies`      | row-wise iteration        |
# | `if movieRow["views"] >= 120` | boolean filtering         |
# | `popular_movies`              | filtered dataset          |

popular_movies = []
for movieRow in movies:
    if movieRow["views"] >= 120:
        popular_movies.append(movieRow)
for movieRow in popular_movies:
    print(movieRow)

{'movie': 'Iron Man', 'genre': 'Action', 'rating': 8.5, 'views': 120}
{'movie': 'Joker', 'genre': 'Drama', 'rating': 9.0, 'views': 140}
{'movie': 'Avengers', 'genre': 'Action', 'rating': 8.8, 'views': 160}


In [20]:
#grouping by genre
genre_groups = {}
for movieRow in movies:
    if movieRow["genre"] in genre_groups:
        genre_groups[movieRow["genre"]].append(movieRow["movie"])
    else:
        genre_groups[movieRow["genre"]] = [movieRow["movie"]]
for genre in genre_groups:
    print(f"{genre} : {genre_groups[genre]}")

Action : ['Iron Man', 'Batman', 'Avengers']
Fantasy : ['Thor']
Drama : ['Joker', 'Logan']


In [22]:
#Insight Reasoning
genre_views = {}
for movieRow in movies:
    if movieRow["genre"] in genre_views:
        genre_views[movieRow["genre"]] += movieRow["views"]
    else:
        genre_views[movieRow["genre"]] = movieRow["views"]
for genre in genre_views:
    print(f"{genre} : {genre_views[genre]}")
# print("The genre that appears to be most popular based on total views seems to be Action."
#       "However, due to small sample size and there being 3 action movies, compared to 2 in drama and 1 in fantasy, furthermore makes this data biased.")
#Revised version:
print("Action appears to be the most popular genre based on total views. However, this conclusion may be misleading because Action has more movies than other genres, which inflates its total view count. Additionally, the dataset is small, and a few high-view movies can disproportionately affect the results. Average views per movie or a larger dataset would be needed for a fair comparison.")

Action : 390
Fantasy : 95
Drama : 240
Action appears to be the most popular genre based on total views. However, this conclusion may be misleading because Action has more movies than other genres, which inflates its total view count. Additionally, the dataset is small, and a few high-view movies can disproportionately affect the results. Average views per movie or a larger dataset would be needed for a fair comparison.
