In [21]:
import numpy as np
import pandas as pd


In [22]:
# Creating a DataFrame from a list of lists
data = [[1, 'Alice', 25], [2, 'Bob', 30], [3, 'Charlie', 28]]
df = pd.DataFrame(data, columns=['ID', 'Name', 'Age'])


# Creating a DataFrame from a dictionary
data_dict = {
    'ID': [1, 2, 3,4],
    'Name': ['Alice', 'Bob', 'Charlie',"ad"],
    'Age': [25, 30, 28, 26]
}
df_dict = pd.DataFrame(data_dict)


# Creating a DataFrame from a NumPy array
np_data = np.array([[1, 'Alice', 25], [2, 'Bob', 30], [3, 'Charlie', 28]])
df_np = pd.DataFrame(np_data, columns=['ID', 'Name', 'Age'])



In [23]:
df_np

Unnamed: 0,ID,Name,Age
0,1,Alice,25
1,2,Bob,30
2,3,Charlie,28


In [25]:
# Basic information about the DataFrame

# print(df.info())

# # Summary statistics
print(df.describe())

        ID        Age
count  3.0   3.000000
mean   2.0  27.666667
std    1.0   2.516611
min    1.0  25.000000
25%    1.5  26.500000
50%    2.0  28.000000
75%    2.5  29.000000
max    3.0  30.000000


In [27]:
print("\nFirst 3 Rows:")
print(df.head(1))

# Displaying the last few rows
print("\nLast 3 Rows:")
print(df.tail(1))

# Displaying specific columns
print("\nSpecific Columns (Name and Age):")
print(df[['Name', 'Age']])


First 3 Rows:
   ID   Name  Age
0   1  Alice   25

Last 3 Rows:
   ID     Name  Age
2   3  Charlie   28

Specific Columns (Name and Age):
      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   28


In [28]:
# Selecting a single column
print("\nSingle Column (Name):")
print(df['Name'])

# Selecting a subset of rows and columns
print("\nSubset of Data:")
print(df.loc[1:2, ['Name', 'Age']])  # Rows 1 and 2, columns 'Name' and 'Age'



Single Column (Name):
0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object

Subset of Data:
      Name  Age
1      Bob   30
2  Charlie   28


In [10]:
# Filtering data based on a condition
filtered_df = df[df['Age'] > 25]
print("\nFiltered Data (Age > 25):")
print(filtered_df)



Filtered Data (Age > 25):
   ID     Name  Age
1   2      Bob   30
2   3  Charlie   28


In [15]:
# Adding a new column
df['Salary'] = [50000, 60000, 55000]
print("\nDataFrame with New Column (Salary):")
print(df)

# Removing a column
# df.drop('Salary', axis=1, inplace=True)  # axis=1 for columns, inplace=True to modify the original DataFrame
print("\nDataFrame with Column Removed (Salary):")
print(df)



DataFrame with New Column (Salary):
   ID     Name  Age  Salary
0   1    Alice   25   50000
1   2      Bob   30   60000
2   3  Charlie   28   55000

DataFrame with Column Removed (Salary):
   ID     Name  Age  Salary
0   1    Alice   25   50000
1   2      Bob   30   60000
2   3  Charlie   28   55000


In [16]:
# Sorting by a column
sorted_df = df.sort_values(by='Age', ascending=False)
print("\nSorted DataFrame (by Age):")
print(sorted_df)


Sorted DataFrame (by Age):
   ID     Name  Age  Salary
1   2      Bob   30   60000
2   3  Charlie   28   55000
0   1    Alice   25   50000


In [17]:
# Grouping data by a column
grouped_df = df.groupby('Age').mean()
print("\nGrouped DataFrame (by Age with Mean):")
print(grouped_df)


Grouped DataFrame (by Age with Mean):
      ID   Salary
Age              
25   1.0  50000.0
28   3.0  55000.0
30   2.0  60000.0


  grouped_df = df.groupby('Age').mean()


In [18]:
# Applying aggregation functions
agg_df = df.groupby('Age').agg({'ID': 'count', 'Salary': 'mean'})
print("\nAggregated DataFrame (Count of IDs and Mean Salary):")
print(agg_df)


Aggregated DataFrame (Count of IDs and Mean Salary):
     ID   Salary
Age             
25    1  50000.0
28    1  55000.0
30    1  60000.0


In [19]:
# Saving DataFrame to a CSV file
df.to_csv('output.csv', index=False)
print("\nDataFrame saved to 'output.csv'")


DataFrame saved to 'output.csv'


In [20]:
loaded_df = pd.read_csv('output.csv')
print("\nLoaded DataFrame from 'output.csv':")
print(loaded_df)


Loaded DataFrame from 'output.csv':
   ID     Name  Age  Salary
0   1    Alice   25   50000
1   2      Bob   30   60000
2   3  Charlie   28   55000
