<a href="https://colab.research.google.com/github/NinoNinov/DS-Finance/blob/main/Main%20Operations%20in%20DataFrame.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# prompt: give me main operations in pandas dataframe with exaplmes

import pandas as pd

# Sample DataFrame
data = {'col1': [1, 2, 3, 4, 5],
        'col2': [6, 7, 8, 9, 10],
        'col3': ['A', 'B', 'C', 'D', 'E']}
df = pd.DataFrame(data)

# 1. Viewing Data
print("Head:\n", df.head(2))  # First 2 rows
print("\nTail:\n", df.tail(2))  # Last 2 rows
print("\nInfo:\n", df.info())  # Summary of DataFrame
print("\nDescribe:\n", df.describe())  # Descriptive statistics

# 2. Selecting Data
print("\nSelecting a column:\n", df['col1'])
print("\nSelecting multiple columns:\n", df[['col1', 'col3']])
print("\nSelecting rows by index:\n", df.loc[0:2])  # Rows 0, 1, and 2
print("\nSelecting rows by condition:\n", df[df['col1'] > 2])
print("\nSelecting specific cell:\n", df.loc[0, 'col2']) # Row 0, Column 'col2'

# 3. Adding/Removing Data
df['col4'] = df['col1'] + df['col2']  # Adding a new column
print("\nDataFrame with new column:\n", df)
df = df.drop('col4', axis=1)  # Removing a column
print("\nDataFrame after dropping column:\n", df)
df = df.drop(0, axis=0) # Removing a row
print("\nDataFrame after dropping row:\n", df)

# 4. Sorting Data
df_sorted = df.sort_values(by='col2', ascending=False) # Sorting
print("\nSorted DataFrame:\n", df_sorted)

# 5. Grouping Data
grouped = df.groupby('col3')['col1'].sum()  # Grouping by 'col3' and summing 'col1'
print("\nGrouped DataFrame:\n", grouped)

# 6. Handling Missing Data
# Create a DataFrame with missing values
data_missing = {'col1': [1, 2, None, 4, 5],
                'col2': [6, None, 8, 9, 10]}
df_missing = pd.DataFrame(data_missing)
print("\nDataFrame with missing values:\n", df_missing)

df_filled = df_missing.fillna(0) # Filling missing values with 0
print("\nDataFrame with filled missing values:\n", df_filled)

# 7. Applying Functions
df['col1_squared'] = df['col1'].apply(lambda x: x**2)  # Apply a function to a column
print("\nDataFrame with applied function:\n", df)

# 8. Merging/Joining DataFrames
# Create another DataFrame for merging
data2 = {'col1': [3, 4, 5], 'col5': [11, 12, 13]}
df2 = pd.DataFrame(data2)
merged_df = pd.merge(df, df2, on='col1', how='inner') # Merging DataFrames
print("\nMerged DataFrame:\n", merged_df)

# 9. Data Aggregation
print("\nSum of 'col1':\n", df['col1'].sum()) # Calculate sum
print("\nMean of 'col2':\n", df['col2'].mean()) # Calculate mean
print("\nMax of 'col1':\n", df['col1'].max()) # Calculate max


# 10. Pivot Tables
pivot_table = pd.pivot_table(df, values='col2', index='col3', aggfunc='sum')
print("\nPivot table:\n",pivot_table)