In [1]:
import pandas as pd

In [2]:
df_movies = pd.read_csv('IMDb movies.csv', low_memory=False)
df_ratings = pd.read_csv('IMDb ratings.csv')

In [3]:
# select columns
df_movies = df_movies[['imdb_title_id', 'title', 'year',
                       'genre', 'country', 'director', 'actors']]

df_ratings = df_ratings[['imdb_title_id', 'total_votes', 'mean_vote']]

## concat()

### Concatenate vertically

To concatenate vertically (along the rows) we should have columns in common between the 2 dataframes

In [4]:
df1 = pd.DataFrame({'id': ['A', 'B', 'C', 'D'],
                    'age': [30, 23, 25, 22]})
df2 = pd.DataFrame({'id': ['E', 'F', 'G', 'F'],
                    'age': [40, 21, 19, 24]})

In [5]:
pd.concat([df1, df2])

Unnamed: 0,id,age
0,A,30
1,B,23
2,C,25
3,D,22
0,E,40
1,F,21
2,G,19
3,F,24


In [6]:
# extract a 50% sample of the original dataframe
df_half = df_movies.sample(frac=0.5)
print(df_movies.shape)
df_half.shape

(85855, 7)


(42928, 7)

In [7]:
# concatenate df_movies and df_sample (vertically along the rows)
df_vertical_concat = pd.concat([df_movies, df_half], axis=0)
df_vertical_concat.head(3)

Unnamed: 0,imdb_title_id,title,year,genre,country,director,actors
0,tt0000009,Miss Jerry,1894,Romance,USA,Alexander Black,"Blanche Bayliss, William Courtenay, Chauncey D..."
1,tt0000574,The Story of the Kelly Gang,1906,"Biography, Crime, Drama",Australia,Charles Tait,"Elizabeth Tait, John Tait, Norman Campbell, Be..."
2,tt0001892,Den sorte drøm,1911,Drama,"Germany, Denmark",Urban Gad,"Asta Nielsen, Valdemar Psilander, Gunnar Helse..."


In [8]:
df_vertical_concat.shape

(128783, 7)

### Concatenate horizontally

To concatenate horizontally (along the columns) we should have a common index between the 2 dataframes

In [9]:
df1 = pd.DataFrame({'id': ['A', 'B', 'C', 'D'],
                    'age': [30, 23, 25, 22]})
df2 = pd.DataFrame({'job': ['Doctor', 'Statistician',
                            'Accountant', 'Developer']})

In [10]:
pd.concat([df1,df2], axis=1)

Unnamed: 0,id,age,job
0,A,30,Doctor
1,B,23,Statistician
2,C,25,Accountant
3,D,22,Developer


In [11]:
# shape of dataframes that we'll concatenate
print(df_movies.shape)
print(df_ratings.shape)

(85855, 7)
(85855, 3)


In [25]:
# check column names so we can choose an appropriate index to concatenate on
print(df_movies.columns)
print(df_ratings.columns)

Index(['imdb_title_id', 'title', 'year', 'genre', 'country', 'director',
       'actors'],
      dtype='object')
Index(['imdb_title_id', 'total_votes', 'mean_vote'], dtype='object')


In [26]:
# concatenate df_movies and df_ratings on 'imdb_title_id' (horizontally along the columns)
df_horizontal_concat = pd.concat([df_movies.set_index('imdb_title_id'), df_ratings.set_index('imdb_title_id')], axis=1)
df_horizontal_concat

Unnamed: 0_level_0,title,year,genre,country,director,actors,total_votes,mean_vote
imdb_title_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
tt0000009,Miss Jerry,1894,Romance,USA,Alexander Black,"Blanche Bayliss, William Courtenay, Chauncey D...",154,5.9
tt0000574,The Story of the Kelly Gang,1906,"Biography, Crime, Drama",Australia,Charles Tait,"Elizabeth Tait, John Tait, Norman Campbell, Be...",589,6.3
tt0001892,Den sorte drøm,1911,Drama,"Germany, Denmark",Urban Gad,"Asta Nielsen, Valdemar Psilander, Gunnar Helse...",188,6.0
tt0002101,Cleopatra,1912,"Drama, History",USA,Charles L. Gaskill,"Helen Gardner, Pearl Sindelar, Miss Fielding, ...",446,5.3
tt0002130,L'Inferno,1911,"Adventure, Drama, Fantasy",Italy,"Francesco Bertolini, Adolfo Padovan","Salvatore Papa, Arturo Pirovano, Giuseppe de L...",2237,6.9
...,...,...,...,...,...,...,...,...
tt9908390,Le lion,2020,Comedy,"France, Belgium",Ludovic Colbeau-Justin,"Dany Boon, Philippe Katerine, Anne Serra, Samu...",398,5.5
tt9911196,De Beentjes van Sint-Hildegard,2020,"Comedy, Drama",Netherlands,Johan Nijenhuis,"Herman Finkers, Johanna ter Steege, Leonie ter...",724,7.9
tt9911774,Padmavyuhathile Abhimanyu,2019,Drama,India,Vineesh Aaradya,"Anoop Chandran, Indrans, Sona Nair, Simon Brit...",265,7.8
tt9914286,Sokagin Çocuklari,2019,"Drama, Family",Turkey,Ahmet Faik Akinci,"Ahmet Faik Akinci, Belma Mamati, Metin Keçeci,...",194,9.4
