# Pandas

#### Pandas is an open-source Python library used for data manipulation and analysis. It provides easy-to-use data structures, such as DataFrame, Series, and Panel, which allow efficient handling of structured data. Pandas is built on top of the NumPy library and provides additional functionalities for data cleaning, transformation, exploration, and visualization.

In [1]:
import pandas as pd
import numpy as np

#### Create a DataFrame from a dictionary

In [2]:
df = pd.DataFrame({'column1': [1, 2, 3], 'column2': ['a', 'b', 'c']})
df

Unnamed: 0,column1,column2
0,1,a
1,2,b
2,3,c


#### Create a DataFrame from a list of lists

In [3]:
df1 = pd.DataFrame([[1, 'a'], [2, 'b'], [3, 'c']], columns=['column1', 'column2'])
df1

Unnamed: 0,column1,column2
0,1,a
1,2,b
2,3,c


#### Create a DataFrame from a NumPy array

In [4]:
df2 = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6]]))
df2

Unnamed: 0,0,1,2
0,1,2,3
1,4,5,6


#### Access the index of the DataFrame

In [5]:
index = df.index
print(index)

RangeIndex(start=0, stop=3, step=1)


#### Access the values of the DataFrame

In [6]:
values = df.values
print(values)

[[1 'a']
 [2 'b']
 [3 'c']]


#### Check the data types of the columns

In [7]:
dtypes = df.dtypes
print(dtypes)

column1     int64
column2    object
dtype: object


#### Check the shape of the DataFrame

In [8]:
shape = df.shape
print(shape)

(3, 2)


#### Check the column names of the DataFrame

In [9]:
columns = df.columns
print(columns)

Index(['column1', 'column2'], dtype='object')


#### Access a specific column

In [12]:
column_data = df['column1']
print(column_data)

0    1
1    2
2    3
Name: column1, dtype: int64


#### Access multiple columns

In [13]:
columns_data = df[['column1', 'column2']]
print(columns_data)

   column1 column2
0        1       a
1        2       b
2        3       c


#### Filter rows based on a condition

In [15]:
filtered_df = df[df['column1'] > 10]
print(filtered_df)

Empty DataFrame
Columns: [column1, column2]
Index: []


#### Sort the DataFrame by a column

In [16]:
sorted_df = df.sort_values('column1')
print(sorted_df)

   column1 column2
0        1       a
1        2       b
2        3       c


#### Group the DataFrame by a column and calculate the mean

In [19]:
grouped_df = df.groupby('column1')['column1'].mean()
print(grouped_df)

column1
1    1.0
2    2.0
3    3.0
Name: column1, dtype: float64


#### Drop a column

In [21]:
df = df.drop('column1', axis=1)
df

Unnamed: 0,column2
0,a
1,b
2,c


#### Drop rows with missing values

In [22]:
df = df.dropna()
df

Unnamed: 0,column2
0,a
1,b
2,c


In [23]:
df

Unnamed: 0,column2
0,a
1,b
2,c


In [24]:
df1

Unnamed: 0,column1,column2
0,1,a
1,2,b
2,3,c


#### Rename columns

In [26]:
df1 = df1.rename(columns={'column1': 'new_column'})
df1

Unnamed: 0,new_column,column2
0,1,a
1,2,b
2,3,c


#### Concatenate DataFrames vertically

In [27]:
concatenated_df = pd.concat([df1, df2])
concatenated_df

Unnamed: 0,new_column,column2,0,1,2
0,1.0,a,,,
1,2.0,b,,,
2,3.0,c,,,
0,,,1.0,2.0,3.0
1,,,4.0,5.0,6.0


#### Concatenate DataFrames horizontally

In [28]:
concatenated_df = pd.concat([df1, df2], axis=1)
concatenated_df

Unnamed: 0,new_column,column2,0,1,2
0,1,a,1.0,2.0,3.0
1,2,b,4.0,5.0,6.0
2,3,c,,,


#### Calculate the sum of a column

In [33]:
column_sum = df1['column2'].sum()
column_sum

'abc'

#### Calculate the mean of a column

In [34]:
column_mean = df1['new_column'].mean()
column_mean

2.0

#### Calculate the median of a column

In [35]:
column_median = df1['new_column'].median()
column_median

2.0

#### Calculate the minimum value in a column

In [36]:
column_min = df1['new_column'].min()
column_min

1

#### Calculate the maximum value in a column

In [37]:
column_max = df1['new_column'].max()
column_max

3