# __Select users to analyse CF models prediction results__

##### Initial setup

In [None]:
%load_ext watermark
%load_ext autoreload

In [None]:
%autoreload 2
%watermark -v -n -m -p numpy,pandas,seaborn,matplotlib

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import sys
import os
from pathlib import Path
from pandas import option_context
sys.path.append('../../../../')
from src.settings import DATA_DIR, PROJECT_DIR

CURRENT_PATH = os.path.abspath(os.path.join(os.pardir))
print(CURRENT_PATH)
print(DATA_DIR)

In [None]:
import logging

logging.basicConfig(level=logging.INFO)

In [None]:
from src.data_processing.dataframe_utils import (prepare_summary_table, start_pipeline, reset_index, 
                                                drop_unnecessary_cols, expand_column, unpivot_dataframe,
                                                sort_values, remove_nan, rename_cols)

from src.data_processing.process.ratings_df_utils import (prepare_user_unrated_movies_table, 
                                                       prepare_user_rated_movies_table, prepare_user_rated_keywords,
                                                       merge_user_rating_with_movies, prepare_user_rated_genres)

from src.data_processing.visualization.plot_utils import plot_counts, change_bars_width, plot_bar

##### Load data

In [None]:
MERGED_DATA_PATH = os.path.join(DATA_DIR, 'processed', 'merged_ml25m_kaggle')
MERGED_DATA_PATH

In [None]:
movies = pd.read_csv(os.path.join(MERGED_DATA_PATH, 'movies_merged.csv'))
ratings = pd.read_csv(os.path.join(MERGED_DATA_PATH, 'ratings_merged.csv'), 
                      dtype={'userId': np.int32,
                             'movieId': np.int32,
                             'rating': np.float32,
                             'timestamp': np.int32})

In [None]:
USERS_ANALYSIS_DIR = os.path.join(PROJECT_DIR, 'reports/figures/sec2_data/usr_analysis')
USERS_ANALYSIS_DIR

### __Movies__

In [None]:
movies.head(2)

In [None]:
movies.info()
movies.describe()

### __Ratings__

In [None]:
ratings.head()

In [None]:
ratings.info()
ratings.describe()

In [None]:
ratings.userId.unique().shape

In [None]:
ratings.movieId.unique().shape

### __User rating summary__

In [None]:
user_rating_summary = (ratings
                       .pipe(start_pipeline)
                       .pipe(prepare_summary_table, group_cols=['userId'], aggr_col='rating', 
                              col_1='rate_amount', col_2='rate_average')
                       .pipe(reset_index))

In [None]:
user_rating_summary.head()

In [None]:
user_rating_summary.info()
user_rating_summary.describe()

### __Choose sample users to recommend movies__

In [None]:
user_rating_summary[user_rating_summary['rate_amount'] > 1000]

In [None]:
user_rating_summary.iloc[user_rating_summary['rate_amount'].argmax(), :]

In [None]:
user_rating_summary.iloc[user_rating_summary['rate_amount'].argmin(), :]

In [None]:
user_rating_summary[user_rating_summary['rate_amount'] == 405].head()

In [None]:
user_rating_summary.iloc[user_rating_summary['rate_average'].argmax(), :]

In [None]:
user_rating_summary.iloc[user_rating_summary['rate_average'].argmin(), :]

In [None]:
rate_movies = pd.merge(ratings, movies[['movieId', 'title', 'release_date']], on='movieId')

In [None]:
rate_movies.head(2)

In [None]:
familiar_movies = rate_movies[rate_movies['title'].isin(['Iron Man', 'Iron Man 2', 'Iron Man 3', 'Pulp Fiction',
                                                        'The Shawshank Redemption', 'Forrest Gump', 
                                                         'The Lord of the Rings: The Fellowship of the Ring'])]

In [None]:
highly_rated_familiar_movies = familiar_movies[familiar_movies['rating'] == 5.0]

In [None]:
highly_rated_familiar_movies[highly_rated_familiar_movies['userId'] == 162349]

In [None]:
user_rating_summary[user_rating_summary['userId'] == 162349]

#### __Selected user ids__

162349 - user who highly rated (5.0) familiar selected movies: 'Iron Man', 'Iron Man 2', 'Iron Man 3', 'Pulp Fiction', 'The Shawshank Redemption', 'Forrest Gump', 'The Lord of the Rings: The Fellowship of the Ring', 
rate_amount=674, rate_avg=4.074184

72315 - with maximum rate amount (15609), rate_avg=3.18989

542 - with minimum rate amount (151), rate_avg=3.582783

12949 - with mean rate amount (405) and mean rate average (3.558025)

12002 - with maximum rate average (5.0), rate_amount=601

131800 - with minimum rate average (0.50939), rate_amount=213

### __User 162349 analysis - highly rated familiar films: Iron Man, Shawshank, Pulp Fiction, etc.__

In [None]:
USER_162349_DIR = os.path.join(USERS_ANALYSIS_DIR, 'user_162349')
Path(USER_162349_DIR).mkdir(parents=True, exist_ok=True)

#### Prepare rated movies table by user 162349

In [None]:
user_162349_rated_movies = (ratings
                           .pipe(prepare_user_rated_movies_table, 162349)
                           .pipe(merge_user_rating_with_movies, movies))

In [None]:
user_162349_rated_movies.head()

In [None]:
user_162349_rated_movies.info()
user_162349_rated_movies.describe()

In [None]:
user_162349_rated_movies.to_csv(os.path.join(USER_162349_DIR, 'usr162349_rated_movies.csv'), index=False)

#### Visualizations

##### Movies ratings count

In [None]:
rate_countplot_162349 = plot_counts(user_162349_rated_movies, count_col='rating', figsize=(6, 6),
                         title='Movies ratings count for user id {}'.format(162349))
rate_countplot_162349 = change_bars_width(rate_countplot_162349, 0.55)
rate_countplot_162349

In [None]:
fig = rate_countplot_162349.get_figure()
fig.savefig(os.path.join(USER_162349_DIR, 'usr162349_mv_rate_amount.png'), bbox_inches = "tight")

##### Genres rated

In [None]:
user_162349_rated_genres = (user_162349_rated_movies
                           .pipe(prepare_user_rated_genres))

In [None]:
user_162349_rated_genres.head()

In [None]:
genre_countplot_162349 = plot_counts(user_162349_rated_genres, count_col='genre', y_label='rate count',
                         title='Rate count by genre for user id {}'.format(162349), figsize=(14, 7), rotate=True)
genre_countplot_162349

In [None]:
fig = genre_countplot_162349.get_figure()
fig.savefig(os.path.join(USER_162349_DIR, 'usr162349_genres_rate_count.png'), bbox_inches = "tight")

In [None]:
user_162349_genres_rates_summary = (user_162349_rated_genres
                                     .pipe(prepare_summary_table, group_cols=['userId', 'genre'], 
                                       aggr_col='rating', col_1='rate_amount', col_2='rate_avg')
                                     .pipe(sort_values, sort_subset=['rate_avg'], ascending=False))

In [None]:
user_162349_genres_rates_summary.head()

In [None]:
user_162349_genres_rates_summary.info()

In [None]:
genre_barplot_162349 = plot_bar(user_162349_genres_rates_summary, x='genre', y='rate_avg',
                      title='Ratings average by genre for user id {}'.format(162349), ann_format="{:.2f}", 
                      figsize=(14, 7), rotate=True, palette="Greens_d")
ax.set_ylim([0.0, 5.0])
genre_barplot_162349

In [None]:
fig = genre_barplot_162349.get_figure()
fig.savefig(os.path.join(USER_162349_DIR, 'usr162349_genres_rate_avg.png'), bbox_inches = "tight")

##### Keywords rated

In [None]:
user_162349_rated_keywords = (user_162349_rated_movies
                            .pipe(prepare_user_rated_keywords))

In [None]:
user_162349_rated_keywords.head()

In [None]:
user_162349_rated_keywords.info()

In [None]:
user_162349_keyword_rates_summary = (user_162349_rated_keywords
                                    .pipe(prepare_summary_table, group_cols=['userId', 'plot_keyword'], 
                                       aggr_col='rating', col_1='rate_amount', col_2='rate_avg')
                                    .pipe(sort_values, sort_subset=['rate_amount'], ascending=False)
                                    .pipe(reset_index))

In [None]:
user_162349_keyword_rates_summary.head()

In [None]:
user_162349_keyword_rates_summary.info()
user_162349_keyword_rates_summary.describe()

In [None]:
keywords_barplot_162349 = plot_bar(user_162349_keyword_rates_summary.head(20), x='plot_keyword', y='rate_amount', 
                          title='Most frequently rated keywords for user id {}'.format(162349), ann_format="{:.0f}",
                          figsize=(14, 7), rotate=True, palette="Oranges_d")
keywords_barplot_162349

In [None]:
fig = keywords_barplot_162349.get_figure()
fig.savefig(os.path.join(USER_162349_DIR, 'usr162349_keywords_rate_amount.png'), bbox_inches = "tight")

### __User 72315 analysis - max rate amount__

In [None]:
USER_72315_DIR = os.path.join(USERS_ANALYSIS_DIR, 'user_72315')
Path(USER_72315_DIR).mkdir(parents=True, exist_ok=True)

#### Prepare rated movies table

In [None]:
user_72315_rated_movies = (ratings
                           .pipe(prepare_user_rated_movies_table, 72315)
                           .pipe(merge_user_rating_with_movies, movies))

In [None]:
user_72315_rated_movies.head(2)

In [None]:
user_72315_rated_movies.info()
user_72315_rated_movies.describe()

In [None]:
user_72315_rated_movies.to_csv(os.path.join(USER_72315_DIR, 'usr72315_rated_movies.csv'), index=False)

#### Visualizations

##### Movies rating count

In [None]:
ax = plot_counts(user_72315_rated_movies, count_col='rating', 
                 title='Movies ratings count for user id {}'.format(72315))
ax

In [None]:
fig = ax.get_figure()
fig.savefig(os.path.join(USER_72315_DIR, 'usr72315_mv_rate_amount.png'), bbox_inches = "tight")

##### Genres rated

In [None]:
user_72315_rated_genres = (user_72315_rated_movies
                           .pipe(prepare_user_rated_genres))

In [None]:
user_72315_rated_genres.head()

In [None]:
ax = plot_counts(user_72315_rated_genres, count_col='genre', y_label='rate count',
                 title='Rate count by genre for user id {}'.format(72315), figsize=(14, 7), rotate=True)
ax

In [None]:
fig = ax.get_figure()
fig.savefig(os.path.join(USER_72315_DIR, 'usr72315_genre_rate_count.png'), bbox_inches = "tight")

In [None]:
user_72315_genres_rates_summary = (user_72315_rated_genres
                                 .pipe(prepare_summary_table, group_cols=['userId', 'genre'], 
                                       aggr_col='rating', col_1='rate_amount', col_2='rate_avg')
                                 .pipe(sort_values, sort_subset=['rate_avg'], ascending=False))

In [None]:
user_72315_genres_rates_summary.head()

In [None]:
user_72315_genres_rates_summary.info()
user_72315_genres_rates_summary.describe()

In [None]:
ax = plot_bar(user_72315_genres_rates_summary, x='genre', y='rate_avg', 
              title='Ratings average by genre for user id {}'.format(72315), ann_format="{:.2f}", 
              figsize=(14, 7), rotate=True, palette="Greens_d")
ax.set_ylim([0.0, 5.0])
ax

In [None]:
fig = ax.get_figure()
fig.savefig(os.path.join(USER_72315_DIR, 'usr72315_genre_rate_avg.png'), bbox_inches = "tight")

##### Rated keywords

In [None]:
user_72315_rated_keywords = (user_72315_rated_movies
                           .pipe(prepare_user_rated_keywords))

In [None]:
user_72315_rated_keywords.head()

In [None]:
user_72315_rated_keywords.info()

In [None]:
user_72315_keywords_rates_summary = (user_72315_rated_keywords
                                 .pipe(prepare_summary_table, group_cols=['userId', 'plot_keyword'], 
                                       aggr_col='rating', col_1='rate_amount', col_2='rate_avg')
                                 .pipe(sort_values, sort_subset=['rate_amount'], ascending=False))

In [None]:
user_72315_keywords_rates_summary.head()

In [None]:
user_72315_keywords_rates_summary.info()
user_72315_keywords_rates_summary.describe()

In [None]:
ax = plot_bar(user_72315_keywords_rates_summary.head(20), x='plot_keyword', y='rate_amount', 
              title='Most frequently rated keywords for user id {}'.format(72315), ann_format="{:.0f}",
              figsize=(14, 7), rotate=True, palette="Oranges_d")
ax

In [None]:
fig = ax.get_figure()
fig.savefig(os.path.join(USER_72315_DIR, 'usr72315_keywords_rate_amount.png'), bbox_inches = "tight")

### __User 542 analysis - min rate amount__

In [None]:
USER_542_DIR = os.path.join(USERS_ANALYSIS_DIR, 'user_542')
Path(USER_542_DIR).mkdir(parents=True, exist_ok=True)

#### Prepare rated movies table

In [None]:
user_542_rated_movies = (ratings
                           .pipe(prepare_user_rated_movies_table, 542)
                           .pipe(merge_user_rating_with_movies, movies))

In [None]:
user_542_rated_movies.head(2)

In [None]:
user_542_rated_movies.info()
user_542_rated_movies.describe()

In [None]:
user_542_rated_movies.to_csv(os.path.join(USER_542_DIR, 'usr542_rated_movies.csv'), index=False)

#### Visualizations

##### Movies rating

In [None]:
ax = plot_counts(user_542_rated_movies, count_col='rating', 
                 title='Movies ratings count for user id {}'.format(542))
ax

In [None]:
fig = ax.get_figure()
fig.savefig(os.path.join(USER_542_DIR, 'usr542_mv_rate_amount.png'), bbox_inches = "tight")

##### Genres rated

In [None]:
user_542_rated_genres = (user_542_rated_movies
                        .pipe(prepare_user_rated_genres))

In [None]:
user_542_rated_genres.head()

In [None]:
ax = plot_counts(user_542_rated_genres, count_col='genre', y_label='rate count',
                 title='Rate count by genre for user id {}'.format(542), figsize=(14, 7), rotate=True)
ax

In [None]:
fig = ax.get_figure()
fig.savefig(os.path.join(USER_542_DIR, 'usr542_genre_rate_count.png'), bbox_inches = "tight")

In [None]:
user_542_genres_rates_summary = (user_542_rated_genres
                                 .pipe(prepare_summary_table, group_cols=['userId', 'genre'], 
                                       aggr_col='rating', col_1='rate_amount', col_2='rate_avg')
                                 .pipe(sort_values, sort_subset=['rate_avg'], ascending=False))

In [None]:
user_542_genres_rates_summary.head()

In [None]:
user_542_genres_rates_summary.info()
user_542_genres_rates_summary.describe()

In [None]:
ax = plot_bar(user_542_genres_rates_summary, x='genre', y='rate_avg', 
              title='Ratings average by genre for user id {}'.format(542), ann_format="{:.2f}", 
              figsize=(14, 7), rotate=True, palette="Greens_d")
ax.set_ylim([0.0, 5.0])
ax

In [None]:
fig = ax.get_figure()
fig.savefig(os.path.join(USER_542_DIR, 'usr542_genre_rate_avg.png'), bbox_inches = "tight")

##### __Keywords rated__

In [None]:
user_542_rated_keywords = (user_542_rated_movies
                           .pipe(prepare_user_rated_keywords))

In [None]:
user_542_rated_keywords.head()

In [None]:
user_542_rated_keywords.info()

In [None]:
user_542_keywords_rates_summary = (user_542_rated_keywords
                                 .pipe(prepare_summary_table, group_cols=['userId', 'plot_keyword'], 
                                       aggr_col='rating', col_1='rate_amount', col_2='rate_avg')
                                 .pipe(sort_values, sort_subset=['rate_amount'], ascending=False))

In [None]:
user_542_keywords_rates_summary.head()

In [None]:
user_542_keywords_rates_summary.info()
user_542_keywords_rates_summary.describe()

In [None]:
ax = plot_bar(user_542_keywords_rates_summary.head(20), x='plot_keyword', y='rate_amount', 
              title='Most frequently rated keywords for user id {}'.format(542), ann_format="{:.0f}",
              figsize=(14, 7), rotate=True, palette="Oranges_d")
ax

In [None]:
fig = ax.get_figure()
fig.savefig(os.path.join(USER_542_DIR, 'usr542_keywords_rate_amount.png'), bbox_inches = "tight")

### __User 12949 analysis - mean rate amount, mean rate avg__

In [None]:
USER_12949_DIR = os.path.join(USERS_ANALYSIS_DIR, 'user_12949')
Path(USER_12949_DIR).mkdir(parents=True, exist_ok=True)

#### __Prepare rated movies table__

In [None]:
user_12949_rated_movies = (ratings
                           .pipe(prepare_user_rated_movies_table, 12949)
                           .pipe(merge_user_rating_with_movies, movies))

In [None]:
user_12949_rated_movies.head(2)

In [None]:
user_12949_rated_movies.info()
user_12949_rated_movies.describe()

In [None]:
user_12949_rated_movies.to_csv(os.path.join(USER_12949_DIR, 'usr12949_rated_movies.csv'), index=False)

#### __Visualizations__

##### Movies ratings count

In [None]:
ax = plot_counts(user_12949_rated_movies, count_col='rating', 
                 title='Movies ratings count for user id {}'.format(12949))
ax

In [None]:
fig = ax.get_figure()
fig.savefig(os.path.join(USER_12949_DIR, 'usr12949_mv_rate_amount.png'), bbox_inches = "tight")

##### Genres rated

In [None]:
user_12949_rated_genres = (user_12949_rated_movies
                          .pipe(prepare_user_rated_genres))

In [None]:
user_12949_rated_genres.head()

In [None]:
ax = plot_counts(user_12949_rated_genres, count_col='genre', y_label='rate count',
                 title='Rate count by genre for user id {}'.format(12949), figsize=(14, 7), rotate=True)
ax

In [None]:
fig = ax.get_figure()
fig.savefig(os.path.join(USER_12949_DIR, 'usr12949_genre_rate_count.png'), bbox_inches = "tight")

In [None]:
user_12949_genres_rates_summary = (user_12949_rated_genres
                                 .pipe(prepare_summary_table, group_cols=['userId', 'genre'], 
                                       aggr_col='rating', col_1='rate_amount', col_2='rate_avg')
                                 .pipe(sort_values, sort_subset=['rate_avg'], ascending=False))

In [None]:
user_12949_genres_rates_summary.head()

In [None]:
user_12949_genres_rates_summary.info()
user_12949_genres_rates_summary.describe()

In [None]:
ax = plot_bar(user_12949_genres_rates_summary, x='genre', y='rate_avg', 
              title='Ratings average by genre for user id {}'.format(12949), ann_format="{:.2f}", 
              figsize=(14, 7), rotate=True, palette="Greens_d")
ax.set_ylim([0.0, 5.0])
ax

In [None]:
fig = ax.get_figure()
fig.savefig(os.path.join(USER_12949_DIR, 'usr12949_genre_rate_avg.png'), bbox_inches = "tight")

##### Keywords rated

In [None]:
user_12949_rated_keywords = (user_12949_rated_movies
                           .pipe(prepare_user_rated_keywords))

In [None]:
user_12949_rated_keywords.head()

In [None]:
user_12949_rated_keywords.info()

In [None]:
user_12949_keywords_rates_summary = (user_12949_rated_keywords
                                     .pipe(prepare_summary_table, group_cols=['userId', 'plot_keyword'], 
                                       aggr_col='rating', col_1='rate_amount', col_2='rate_avg')
                                     .pipe(sort_values, sort_subset=['rate_amount'], ascending=False))

In [None]:
user_12949_keywords_rates_summary.head()

In [None]:
user_12949_keywords_rates_summary.info()
user_12949_keywords_rates_summary.describe()

In [None]:
ax = plot_bar(user_12949_keywords_rates_summary.head(20), x='plot_keyword', y='rate_amount', 
              title='Most frequently rated keywords for user id {}'.format(12949), ann_format="{:.0f}",
              figsize=(14, 7), rotate=True, palette="Oranges_d")
ax

In [None]:
fig = ax.get_figure()
fig.savefig(os.path.join(USER_12949_DIR, 'usr12949_keywords_rate_amount.png'), bbox_inches = "tight")

### __User 12002 analysis - max rate avg__

In [None]:
USER_12002_DIR = os.path.join(USERS_ANALYSIS_DIR, 'user_12002')
Path(USER_12002_DIR).mkdir(parents=True, exist_ok=True)

#### __Prepare rated movies table__

In [None]:
user_12002_rated_movies = (ratings
                           .pipe(prepare_user_rated_movies_table, 12002)
                           .pipe(merge_user_rating_with_movies, movies))

In [None]:
user_12002_rated_movies.head(2)

In [None]:
user_12002_rated_movies.info()
user_12002_rated_movies.describe()

In [None]:
user_12002_rated_movies.to_csv(os.path.join(USER_12002_DIR, 'usr12002_rated_movies.csv'), index=False)

#### __VIsualizations__

##### Movies rating count

In [None]:
ax = plot_counts(user_12002_rated_movies, count_col='rating', 
                 title='Movies ratings count for user id {}'.format(12002), figsize=(4,6))
ax = change_bars_width(ax, .35)
ax

In [None]:
fig = ax.get_figure()
fig.savefig(os.path.join(USER_12002_DIR, 'usr12002_mv_rate_amount.png'), bbox_inches = "tight")

##### Genres rated

In [None]:
user_12002_rated_genres = (user_12002_rated_movies
                          .pipe(prepare_user_rated_genres))

In [None]:
user_12002_rated_genres.head()

In [None]:
ax = plot_counts(user_12002_rated_genres, count_col='genre', y_label='rate count',
                 title='Rate count by genre for user id {}'.format(12002), figsize=(14, 7), rotate=True)
ax

In [None]:
fig = ax.get_figure()
fig.savefig(os.path.join(USER_12002_DIR, 'usr12002_genre_rate_count.png'), bbox_inches = "tight")

In [None]:
user_12002_genres_rates_summary = (user_12002_rated_genres
                                 .pipe(prepare_summary_table, group_cols=['userId', 'genre'], 
                                       aggr_col='rating', col_1='rate_amount', col_2='rate_avg')
                                 .pipe(sort_values, sort_subset=['rate_avg'], ascending=False))

In [None]:
user_12002_genres_rates_summary.head()

In [None]:
user_12002_genres_rates_summary.info()
user_12002_genres_rates_summary.describe()

In [None]:
ax = plot_bar(user_12002_genres_rates_summary, x='genre', y='rate_avg', 
              title='Ratings average by genre for user id {}'.format(12002), ann_format="{:.2f}", 
              figsize=(14, 7), rotate=True, palette="Greens_d")
ax.set_ylim([0.0, 5.5])
ax

In [None]:
fig = ax.get_figure()
fig.savefig(os.path.join(USER_12002_DIR, 'usr12002_genre_rate_avg.png'), bbox_inches = "tight")

##### Keywords rated

In [None]:
user_12002_rated_keywords = (user_12002_rated_movies
                            .pipe(prepare_user_rated_keywords))

In [None]:
user_12002_rated_keywords.head()

In [None]:
user_12002_rated_keywords.info()

In [None]:
user_12002_keywords_rates_summary = (user_12002_rated_keywords
                                     .pipe(prepare_summary_table, group_cols=['userId', 'plot_keyword'], 
                                       aggr_col='rating', col_1='rate_amount', col_2='rate_avg')
                                     .pipe(sort_values, sort_subset=['rate_amount'], ascending=False)
                                     .pipe(reset_index))

In [None]:
user_12002_keywords_rates_summary.head()

In [None]:
user_12002_keywords_rates_summary.info()
user_12002_keywords_rates_summary.describe()

In [None]:
ax = plot_bar(user_12002_keywords_rates_summary.head(20), x='plot_keyword', y='rate_amount', 
              title='Most frequently rated keywords for user id {}'.format(12002), ann_format="{:.0f}",
              figsize=(14, 7), rotate=True, palette="Oranges_d")
ax

In [None]:
fig = ax.get_figure()
fig.savefig(os.path.join(USER_12002_DIR, 'usr12002_keywords_rate_amount.png'), bbox_inches = "tight")

### __User 131800 analysis - min rate avg__

In [None]:
USER_131800_DIR = os.path.join(USERS_ANALYSIS_DIR, 'user_131800')
Path(USER_131800_DIR).mkdir(parents=True, exist_ok=True)

#### __Prepare rated movies table__

In [None]:
user_131800_rated_movies = (ratings
                           .pipe(prepare_user_rated_movies_table, 131800)
                           .pipe(merge_user_rating_with_movies, movies))

In [None]:
user_131800_rated_movies.head(2)

In [None]:
user_131800_rated_movies.info()
user_131800_rated_movies.describe()

In [None]:
user_131800_rated_movies.to_csv(os.path.join(USER_131800_DIR, 'usr131800_rated_movies.csv'), index=False)

#### __Visualizations__

##### Movies rating count

In [None]:
ax = plot_counts(user_131800_rated_movies, count_col='rating', 
                 title='Movies ratings count for user id {}'.format(12002), figsize=(4,6))
ax = change_bars_width(ax, 0.65)
ax

In [None]:
fig = ax.get_figure()
fig.savefig(os.path.join(USER_131800_DIR, 'usr131800_mv_rate_amount.png'), bbox_inches = "tight")

##### Genres rated

In [None]:
user_131800_rated_genres = (user_131800_rated_movies
                            .pipe(prepare_user_rated_genres))

In [None]:
user_131800_rated_genres.head()

In [None]:
ax = plot_counts(user_131800_rated_genres, count_col='genre', y_label='rate count',
                 title='Rate count by genre for user id {}'.format(131800), figsize=(14, 6), rotate=True)
ax

In [None]:
fig = ax.get_figure()
fig.savefig(os.path.join(USER_131800_DIR, 'usr131800_genre_rate_count.png'), bbox_inches = "tight")

In [None]:
user_131800_genres_rates_summary = (user_131800_rated_genres
                                    .pipe(prepare_summary_table, group_cols=['userId', 'genre'], 
                                       aggr_col='rating', col_1='rate_amount', col_2='rate_avg')
                                   .pipe(sort_values, sort_subset=['rate_avg'], ascending=False)
                                   .pipe(reset_index))

In [None]:
user_131800_genres_rates_summary.head()

In [None]:
user_131800_genres_rates_summary.info()
user_131800_genres_rates_summary.describe()

In [None]:
ax = plot_bar(user_131800_genres_rates_summary, x='genre', y='rate_avg', 
              title='Ratings average by genre for user id {}'.format(131800), ann_format="{:.2f}", 
              figsize=(14, 6), rotate=True, palette="Greens_d")
ax.set_ylim([0.0, 5.0])
ax

In [None]:
fig = ax.get_figure()
fig.savefig(os.path.join(USER_131800_DIR, 'usr131800_genre_rate_avg.png'), bbox_inches = "tight")

##### Rated keywords

In [None]:
user_131800_rated_keywords = (user_131800_rated_movies
                             .pipe(prepare_user_rated_keywords))

In [None]:
user_131800_rated_keywords.info()

In [None]:
user_131800_keywords_rates_summary = (user_131800_rated_keywords
                                     .pipe(prepare_summary_table, group_cols=['userId', 'plot_keyword'], 
                                       aggr_col='rating', col_1='rate_amount', col_2='rate_avg')
                                     .pipe(sort_values, sort_subset=['rate_amount'], ascending=False)
                                     .pipe(reset_index))

In [None]:
user_131800_keywords_rates_summary.head()

In [None]:
user_131800_keywords_rates_summary.info()
user_131800_keywords_rates_summary.describe()

In [None]:
ax = plot_bar(user_131800_keywords_rates_summary.head(20), x='plot_keyword', y='rate_amount', 
              title='Most frequently rated keywords for user id {}'.format(131800), ann_format="{:.0f}",
              figsize=(14, 7), rotate=True, palette="Oranges_d")
ax

In [None]:
fig = ax.get_figure()
fig.savefig(os.path.join(USER_131800_DIR, 'usr131800_keywords_rate_amount.png'), bbox_inches = "tight")