# Concatenate and transform data

In [1]:
import pandas as pd
import numpy as np

from pandas import Series, DataFrame

df_users = DataFrame({
    'first name': ['Serban', 'Alex', 'Cristina', 'Florin'],
    'last name': ['Blebea', 'Popa', 'Aliman', 'Aliman'],
    'age': ['28', '33', '32', '38'],
    'gender': ['male', 'male', 'female', 'male']
})

df_details = DataFrame({
    'job': ['developer', 'freelancer', 'media buyer', 'selles manager'],
    'hobby': ['coding', 'traveling', 'batminton', 'eat']
})

print(df_users)
print(df_details)

  first name last name age  gender
0     Serban    Blebea  28    male
1       Alex      Popa  33    male
2   Cristina    Aliman  32  female
3     Florin    Aliman  38    male
              job      hobby
0       developer     coding
1      freelancer  traveling
2     media buyer  batminton
3  selles manager        eat


### Concatenate 2 DataFrame tables

In [2]:
new_concat_df = pd.concat([df_users, df_details], axis=1)
print(new_concat_df)

  first name last name age  gender             job      hobby
0     Serban    Blebea  28    male       developer     coding
1       Alex      Popa  33    male      freelancer  traveling
2   Cristina    Aliman  32  female     media buyer  batminton
3     Florin    Aliman  38    male  selles manager        eat


### Drop columns from DataFrame

In [3]:
no_first_name_df = df_users.drop(['first name'], axis=1)
print(no_first_name_df)

  last name age  gender
0    Blebea  28    male
1      Popa  33    male
2    Aliman  32  female
3    Aliman  38    male


### Add data to DataFrame

In [4]:
series = Series(np.arange(6))
series.name = 'range'
print(series)

0    0
1    1
2    2
3    3
4    4
5    5
Name: range, dtype: int64


In [5]:
joined_df = DataFrame.join(df_users, series)
print(joined_df)

  first name last name age  gender  range
0     Serban    Blebea  28    male      0
1       Alex      Popa  33    male      1
2   Cristina    Aliman  32  female      2
3     Florin    Aliman  38    male      3


In [6]:
joined_df = series.append(series, ignore_index=False)
print(joined_df)

0    0
1    1
2    2
3    3
4    4
5    5
0    0
1    1
2    2
3    3
4    4
5    5
Name: range, dtype: int64


### Sort data in DataFrame by column

In [7]:
sorted_df = df_users.sort_values(by='last name', ascending=True)
print(sorted_df)

  first name last name age  gender
2   Cristina    Aliman  32  female
3     Florin    Aliman  38    male
0     Serban    Blebea  28    male
1       Alex      Popa  33    male


### Group data in DataFrame by column

In [8]:
grouped_df = df_users.groupby(df_users['gender'])
print(grouped_df)

<pandas.core.groupby.groupby.DataFrameGroupBy object at 0x10abcfc18>
