In [18]:
import pandas as pd

# Load the ratings from the csv file
df_ratings = pd.read_csv('movie_ratings.csv')

# Show the dataset
print("Movie Ratings Dataset:")
print(df_ratings)


Movie Ratings Dataset:
     User  American Sniper  Edge of Tomorrow  Groundhog Day  Jurassic World  \
0   Bimal                5                 4              3             3.0   
1  Bharat                4                 3              3             NaN   
2  Sabina                4                 4              4             4.0   
3  Roshan                3                 3              2             2.0   
4    Puja                5                 4              4             NaN   

   Lost in Translation  Lucy  
0                  NaN   4.0  
1                  4.0   NaN  
2                  4.0   4.0  
3                  3.0   3.0  
4                  NaN   NaN  


In [19]:
# Show the average ratings for each user and each movie
user_avg_ratings = df_ratings.set_index("User").mean(axis=1)  # Average rating per user
movie_avg_ratings = df_ratings.set_index("User").mean(axis=0)  # Average rating per movie

# Display the average ratings
print("\nAverage Ratings per User:")
print(user_avg_ratings)

print("\nAverage Ratings per Movie:")
print(movie_avg_ratings)


Average Ratings per User:
User
Bimal     3.800000
Bharat    3.500000
Sabina    4.000000
Roshan    2.666667
Puja      4.333333
dtype: float64

Average Ratings per Movie:
American Sniper        4.200000
Edge of Tomorrow       3.600000
Groundhog Day          3.200000
Jurassic World         3.000000
Lost in Translation    3.666667
Lucy                   3.666667
dtype: float64


In [20]:
# Normalize ratings for each user (Min-Max normalization)
df_normalized = df_ratings.set_index("User").apply(lambda x: (x - x.min()) / (x.max() - x.min()), axis=1)

# Display normalized ratings
print("\nNormalized Ratings (Min-Max Scaling):")
print(df_normalized)



Normalized Ratings (Min-Max Scaling):
        American Sniper  Edge of Tomorrow  Groundhog Day  Jurassic World  \
User                                                                       
Bimal               1.0               0.5            0.0             0.0   
Bharat              1.0               0.0            0.0             NaN   
Sabina              NaN               NaN            NaN             NaN   
Roshan              1.0               1.0            0.0             0.0   
Puja                1.0               0.0            0.0             NaN   

        Lost in Translation  Lucy  
User                               
Bimal                   NaN   0.5  
Bharat                  1.0   NaN  
Sabina                  NaN   NaN  
Roshan                  1.0   1.0  
Puja                    NaN   NaN  


In [21]:
# Standardize ratings for each user (Z-score standardization)
df_standardized = df_ratings.set_index("User").apply(lambda x: (x - x.mean()) / x.std(), axis=1)

# Display standardized ratings
print("\nStandardized Ratings (Z-score Scaling):")
print(df_standardized)


Standardized Ratings (Z-score Scaling):
        American Sniper  Edge of Tomorrow  Groundhog Day  Jurassic World  \
User                                                                       
Bimal          1.434274          0.239046      -0.956183       -0.956183   
Bharat         0.866025         -0.866025      -0.866025             NaN   
Sabina              NaN               NaN            NaN             NaN   
Roshan         0.645497          0.645497      -1.290994       -1.290994   
Puja           1.154701         -0.577350      -0.577350             NaN   

        Lost in Translation      Lucy  
User                                   
Bimal                   NaN  0.239046  
Bharat             0.866025       NaN  
Sabina                  NaN       NaN  
Roshan             0.645497  0.645497  
Puja                    NaN       NaN  


# User Average Ratings  
The average rating each user gave across all movies.

# Movie Average Ratings  
The average rating each movie received across all users.

# Normalized Ratings  
Min-Max normalization applied per user, where ratings are scaled between 0 and 1.

# Standardized Ratings  
Z-score standardization applied per user, where ratings are adjusted based on their mean and standard deviation.

## Conclusion on Normalized Ratings

### Advantages:
- Allows fair comparison of user ratings since different users may have different rating tendencies (some may rate generously, others more critically).
- Useful when implementing recommendation algorithms that require standardized inputs.

### Disadvantages:
- Normalization can distort the original rating scale, making it harder to interpret the real sentiment behind the ratings.
- If a user has rated only a small number of movies, normalization can amplify small differences disproportionately.
