In [None]:
# This line imports the Pandas library and assigns it the alias 'pd'. This alias is a common convention in the data science community.
# Here, a Python dictionary data is defined,
# where keys represent column names ('Name', 'Age', 'City'), and values are lists containing data for each column.
# The pd.DataFrame(data) creates a Pandas DataFrame from this dictionary.

In [None]:
# dataframe

In [1]:
import pandas as pd

# Creating a DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie'],
        'Age': [25, 30, 35],
        'City': ['New York', 'San Francisco', 'Los Angeles']}

df = pd.DataFrame(data)

# Displaying the DataFrame
print(df)


      Name  Age           City
0    Alice   25       New York
1      Bob   30  San Francisco
2  Charlie   35    Los Angeles


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [None]:
# Reading CSV data using using pandas

In [2]:
import pandas as pd

# Specify the path to your CSV file
csv_file_path = 'details.csv'

# Read CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Display the DataFrame
print(df)


      Name  Age           City
0    Alice   25       New York
1      Bob   30  San Francisco
2  Charlie   35    Los Angeles


In [None]:
#

In [3]:
import pandas as pd

# Specify the path to your CSV file
csv_file_path = 'grade.csv'

# Read CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Display the DataFrame
print(df)


      Name  Class Grade
0   Sachin     12     A
1  Chaithu     12     A
2   Jeevan     12     A
3    Akash     12     A


In [None]:
# Filter Data in Pandas Dataframe using query

In [4]:
import pandas as pd

# Create a new sample DataFrame
data = {'Product': ['Laptop', 'Phone', 'Tablet', 'Desktop'],
        'Price': [1200, 800, 400, 1500],
        'Stock': [50, 100, 20, 30],
        'Brand': ['A', 'B', 'A', 'C']}

df = pd.DataFrame(data)

# Display the original DataFrame
print("Original DataFrame:")
print(df)
print()

# Use query to filter rows where Price is greater than 1000 and Stock is less than 50
result = df.query('Price > 1000 & Stock < 50')

print("Filtered DataFrame:")
print(result)


Original DataFrame:
   Product  Price  Stock Brand
0   Laptop   1200     50     A
1    Phone    800    100     B
2   Tablet    400     20     A
3  Desktop   1500     30     C

Filtered DataFrame:
   Product  Price  Stock Brand
3  Desktop   1500     30     C


In [None]:
# Get Count by Status using Pandas Dataframe APIs

In [5]:
import pandas as pd

# Sample DataFrame creation for illustration
data = {'id': [1, 2, 3, 4, 5],
        'status': ['Active', 'Inactive', 'Active', 'Inactive', 'Active']}
df = pd.DataFrame(data)

# Group by 'status' and count the occurrences
status_counts = df.groupby('status').size().reset_index(name='count')

# Print the result
print(status_counts)


     status  count
0    Active      3
1  Inactive      2


In [None]:
# Get count by Month and Status using Pandas Dataframe APIs

In [6]:
import pandas as pd

# Sample data
data = {
    'Date': ['2023-01-01', '2023-01-01', '2023-02-01', '2023-02-01', '2023-02-01'],
    'Status': ['Open', 'Closed', 'Open', 'Closed', 'Open']
}

# Create a DataFrame
df = pd.DataFrame(data)

# Convert 'Date' column to datetime
df['Date'] = pd.to_datetime(df['Date'])

# Extract Month and Year from 'Date' column
df['Month'] = df['Date'].dt.to_period('M')

# Use groupby and pivot_table to get count by Month and Status
result = df.groupby(['Month', 'Status']).size().reset_index(name='Count')

# Pivot the table to have Status as columns
result_pivot = result.pivot_table(index='Month', columns='Status', values='Count', fill_value=0)

# If you want to reset the index
result_pivot.reset_index(inplace=True)

print(result_pivot)


Status    Month  Closed  Open
0       2023-01     1.0   1.0
1       2023-02     1.0   2.0


In [None]:
# Create Dataframes using dynamic column list on CSV Data

In [9]:
import pandas as pd

# Load your CSV data into a DataFrame
csv_file_path = 'Sample.csv'
original_df = pd.read_csv(csv_file_path)

# Print original DataFrame columns
print("Original DataFrame Columns:", original_df.columns)

# Define a dynamic column list 
dynamic_column_list = ['column1', 'column2', 'column3']

# Create a new DataFrame with the selected columns
new_df = original_df[dynamic_column_list]

# Display the new DataFrame
print(new_df)


Original DataFrame Columns: Index(['column1', 'column2', 'column3', 'column4', 'column5'], dtype='object')
   column1  column2 column3
0        1       11       a
1        2       12       b
2        3       13       c
3        4       14       d
4        5       15       e


In [None]:
# Performing Inner Join between Pandas Dataframes

In [10]:
import pandas as pd

# Creating two sample DataFrames
df1 = pd.DataFrame({'ID': [1, 2, 3, 4],
                    'Name': ['Alice', 'Bob', 'Charlie', 'David']})

df2 = pd.DataFrame({'ID': [2, 3, 4, 5],
                    'Age': [25, 30, 35, 40]})

# Performing an inner join based on the 'ID' column
result_df = pd.merge(df1, df2, on='ID', how='inner')

print(result_df)

   ID     Name  Age
0   2      Bob   25
1   3  Charlie   30
2   4    David   35


In [None]:
# Perform Aggregations on Join results in pandas

In [11]:
import pandas as pd

# Sample DataFrames
df1 = pd.DataFrame({
    'key': ['A', 'B', 'C', 'A', 'B', 'C'],
    'value1': [1, 2, 3, 4, 5, 6]
})

df2 = pd.DataFrame({
    'key': ['A', 'B', 'C', 'A', 'B', 'C'],
    'value2': [10, 20, 30, 40, 50, 60]
})

# Merge DataFrames on the 'key' column
merged_df = pd.merge(df1, df2, on='key')

# Group by 'key' and perform aggregations on the joined data
agg_result = merged_df.groupby('key').agg({
    'value1': 'sum',
    'value2': 'mean'
}).reset_index()

print(agg_result)

  key  value1  value2
0   A      10    25.0
1   B      14    35.0
2   C      18    45.0


In [None]:
# Sort Data in Pandas Dataframes

In [12]:
import pandas as pd

# Create a sample DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie', 'David'],
        'Age': [25, 30, 22, 35],
        'Salary': [50000, 60000, 45000, 70000]}

df = pd.DataFrame(data)

# Sorting by a single column, for example, 'Age'
df_sorted_by_age = df.sort_values(by='Age')
print("Sorted by Age:\n", df_sorted_by_age)

# Sorting by multiple columns, for example, 'Age' and then 'Salary'
df_sorted_by_multiple_columns = df.sort_values(by=['Age', 'Salary'])
print("\nSorted by Age and Salary:\n", df_sorted_by_multiple_columns)

# Sorting in descending order by 'Age'
df_sorted_descending = df.sort_values(by='Age', ascending=False)
print("\nSorted by Age (descending):\n", df_sorted_descending)

# Sorting 'df' in-place by 'Age'
df.sort_values(by='Age', inplace=True)
print("\nDataFrame sorted in-place by Age:\n", df)

Sorted by Age:
       Name  Age  Salary
2  Charlie   22   45000
0    Alice   25   50000
1      Bob   30   60000
3    David   35   70000

Sorted by Age and Salary:
       Name  Age  Salary
2  Charlie   22   45000
0    Alice   25   50000
1      Bob   30   60000
3    David   35   70000

Sorted by Age (descending):
       Name  Age  Salary
3    David   35   70000
1      Bob   30   60000
0    Alice   25   50000
2  Charlie   22   45000

DataFrame sorted in-place by Age:
       Name  Age  Salary
2  Charlie   22   45000
0    Alice   25   50000
1      Bob   30   60000
3    David   35   70000


In [None]:
# Writing Pandas Dataframes to Files

In [15]:
import pandas as pd

# Sample DataFrame
data = {'Name': ['John', 'Alice', 'Bob'],
        'Age': [28, 24, 22],
        'City': ['New York', 'San Francisco', 'Seattle']}
df = pd.DataFrame(data)

# Write to CSV
df.to_csv('output.csv', index=False)
print("CSV file written successfully.")

# Write to JSON
df.to_json('output.json', orient='records')
print("JSON file written successfully.")

CSV file written successfully.
JSON file written successfully.


In [None]:
# Write Pandas Dataframes to JSON Files

In [16]:
import pandas as pd

# Creating a sample DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie'],
        'Age': [25, 30, 35],
        'City': ['New York', 'San Francisco', 'Los Angeles']}
df = pd.DataFrame(data)

# Specify the file path where you want to save the JSON file
json_file_path = 'output.json'

# Writing DataFrame to JSON file
df.to_json(json_file_path, orient='records', lines=True)

print(f'DataFrame has been written to {json_file_path}')


DataFrame has been written to output.json
