In [1]:
import pandas as pd


In [4]:
data = [1,2,3,4,5]
series = pd.Series(data)

print(series)

0    1
1    2
2    3
3    4
4    5
dtype: int64


In [6]:
data = {
    'name': ["Alica", "Bob", "Charlie"],
    'age': [24,27,22],
    'city': ["Stockholm", "Göteborg","Malmö"]      

}
df = pd.DataFrame(data)
print(df)

      name  age       city
0    Alica   24  Stockholm
1      Bob   27   Göteborg
2  Charlie   22      Malmö


In [8]:
print(df.head())

      name  age       city
0    Alica   24  Stockholm
1      Bob   27   Göteborg
2  Charlie   22      Malmö


In [9]:
print(df.tail())

      name  age       city
0    Alica   24  Stockholm
1      Bob   27   Göteborg
2  Charlie   22      Malmö


In [10]:
df["name"]

0      Alica
1        Bob
2    Charlie
Name: name, dtype: object

In [18]:
print(df.describe())

             age
count   3.000000
mean   24.333333
std     2.516611
min    22.000000
25%    23.000000
50%    24.000000
75%    25.500000
max    27.000000


In [19]:
df['age'].fillna(30, inplace= True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['age'].fillna(30, inplace= True)


In [20]:
df.dropna(inplace= True)

In [35]:
print(df.iloc[1,1])

27


In [36]:
filtered_df = df[df['age']> 25 ]
print(filtered_df)

  name  age      city
1  Bob   27  Göteborg


In [40]:
filtered_df = df[(df['age'] > 25) & (df['city'] == "Göteborg")]
print(filtered_df)

  name  age      city
1  Bob   27  Göteborg


In [42]:
grouped = df.groupby('city').size()
print(grouped)

city
Göteborg     1
Malmö        1
Stockholm    1
dtype: int64


In [43]:
grouped = df.groupby('city')['age'].mean()
print(grouped)

city
Göteborg     27.0
Malmö        22.0
Stockholm    24.0
Name: age, dtype: float64


In [46]:
df1 = pd.DataFrame(
    {
        'ID': [1,2,3],
        'Name': ["Alice", "Bob", "Charlie"]
    }
)

df2 = pd.DataFrame(
    {
        'ID': [1,2,3],
        "Age": [24,27,22]
    }
)

merged_df = pd.merge(df1,df2, on= "ID")
print(merged_df)

   ID     Name  Age
0   1    Alice   24
1   2      Bob   27
2   3  Charlie   22


In [50]:
df3 = pd.concat([df1,df2], ignore_index= True)
# print(df3)

df4 = pd.concat([df1,df2,], axis= 1)
print(df4)

   ID     Name  ID  Age
0   1    Alice   1   24
1   2      Bob   2   27
2   3  Charlie   3   22


In [57]:
df['sale_date'] = ['2025-01-01', '2025-02-01', '2025-03-01']

df['sale_date'] = pd.to_datetime(df['sale_date'])
print(df['sale_date'])

0   2025-01-01
1   2025-02-01
2   2025-03-01
Name: sale_date, dtype: datetime64[ns]


In [58]:
print(df)

      name  age       city  sale_date
0    Alica   24  Stockholm 2025-01-01
1      Bob   27   Göteborg 2025-02-01
2  Charlie   22      Malmö 2025-03-01


In [59]:
df['year'] = df['sale_date'].dt.year
df['month'] = df['sale_date'].dt.month
df['day'] = df['sale_date'].dt.day
print(df[['sale_date','year','month','day']])

   sale_date  year  month  day
0 2025-01-01  2025      1    1
1 2025-02-01  2025      2    1
2 2025-03-01  2025      3    1


In [60]:
df.to_csv('data.csv', index= False)

In [62]:
df_from_csv = pd.read_csv('data/data.csv')
print(df_from_csv)

      name  age       city   sale_date  year  month  day
0    Alica   24  Stockholm  2025-01-01  2025      1    1
1      Bob   27   Göteborg  2025-02-01  2025      2    1
2  Charlie   22      Malmö  2025-03-01  2025      3    1


In [66]:
df = dict(Name = ["Niclas","Anna","Jens"],
          Age = [24,27,22],
          City = ["Stockholm", "Göteborg", "Malmö"] 
          )
df = pd.DataFrame(df)
df

Unnamed: 0,Name,Age,City
0,Niclas,24,Stockholm
1,Anna,27,Göteborg
2,Jens,22,Malmö


In [67]:
df.head(2)

Unnamed: 0,Name,Age,City
0,Niclas,24,Stockholm
1,Anna,27,Göteborg


In [85]:
filtered_df = df[df['Age'] > 23]
name_over_23 = filtered_df['Name']

print(name_over_23)

0    Niclas
1      Anna
Name: Name, dtype: object


In [86]:
df['Country'] = 'Sweden'

In [87]:
df

Unnamed: 0,Name,Age,City,Country
0,Niclas,24,Stockholm,Sweden
1,Anna,27,Göteborg,Sweden
2,Jens,22,Malmö,Sweden


In [92]:
new_df = dict(City= ["Stockholm", "Göteborg","Malmö","Stockholm"],
              Population= [975551,583056,347949,975551]
              )

new_df = pd.DataFrame(new_df)
new_df

Unnamed: 0,City,Population
0,Stockholm,975551
1,Göteborg,583056
2,Malmö,347949
3,Stockholm,975551


In [101]:
filtered_df = new_df.groupby("City").size().sort_values(ascending= False)
filtered_df

City
Stockholm    2
Göteborg     1
Malmö        1
dtype: int64