# data frame
A Pandas DataFrame is a 2 dimensional data structure, like a 2 dimensional array, or a table with rows and columns.

In [1]:
# create a simple Pandas DataFrame:
import pandas as pd
data = {
  "calories": [420, 380, 390],
  "duration": [50, 40, 45]
}
#load data into a DataFrame object:
df = pd.DataFrame(data)
print(df) 

   calories  duration
0       420        50
1       380        40
2       390        45


In [3]:
# From a Dictionary of Lists:
import pandas as pd
data = {'Name': ['Alice', 'Bob', 'Charlie'],
        'Age': [25, 30, 35],
        'City': ['New York', 'Los Angeles', 'Chicago']}
df = pd.DataFrame(data)
print(df)

      Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago


In [4]:
# From a List of Lists:
import pandas as pd
data = [['Alice', 25, 'New York'],
        ['Bob', 30, 'Los Angeles'],
        ['Charlie', 35, 'Chicago']]
df = pd.DataFrame(data, columns=['Name', 'Age', 'City'])
print(df)

      Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago


In [6]:
# From NumPy Array:
import pandas as pd
import numpy as np
data = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
df = pd.DataFrame(data, columns=['A', 'B', 'C'])
print(df)

   A  B  C
0  1  2  3
1  4  5  6
2  7  8  9


# Dataframe read csv

* A simple way to store big data sets is to use CSV files (comma separated files).  
* In our examples we will be using a CSV file type called 'data.csv'.   
* Download data.csv. or Open data.csv
* Syntax:   
  pd.read_csv(filepath_or_buffer, sep=’ ,’ , header=’infer’,  index_col=None, usecols=None, engine=None, skiprows=None, nrows=None) 

In [13]:
# Load the CSV into a DataFrame:
import pandas as pd
df = pd.read_csv('C:/Users/PJ-COMPUTERS/Desktop/num.csv')
print(df.to_string()) 

            name  energy (kcal/kJ)  water (g)  protein (g)
0         Apple                 48      86.70         0.27
1       Apricot                 48      86.40         1.40
2       Avocado                160      73.23         2.00
3        Banana                 89      74.91         1.09
4  Blackberries                 43      88.15         1.39


# Dataframe read xlsx  
*  A simple way to store big data sets is to use xlsx files .   
* save file as xlsx type.

In [3]:
import pandas as pd
df_excel = pd.read_excel('C:/Users/PJ-COMPUTERS/Desktop/num1.xlsx') 
print(df_excel)

            name  energy (kcal/kJ)  water (g)  protein (g)
0         Apple                 48      86.70         0.27
1       Apricot                 48      86.40         1.40
2       Avocado                160      73.23         2.00
3        Banana                 89      74.91         1.09
4  Blackberries                 43      88.15         1.39


# Create dataframe as matrix

In [14]:
import pandas as pd
import numpy as np
# Create a 2D NumPy array (matrix)
matrix = np.array([[1, 2, 3],
                   [4, 5, 6],
                   [7, 8, 9]])
# Create a DataFrame from the matrix
df = pd.DataFrame(matrix)
# Display the DataFrame
print(df)

   0  1  2
0  1  2  3
1  4  5  6
2  7  8  9


In [15]:
# Create a DataFrame with custom column names and index labels
df = pd.DataFrame(matrix, columns=['A', 'B', 'C'], index=['X', 'Y', 'Z'])
print(df)

   A  B  C
X  1  2  3
Y  4  5  6
Z  7  8  9


# Create dataframe as dictionary

In [16]:
import pandas as pd
# Create a dictionary
data = {'Name': ['Alice', 'Bob', 'Charlie'],
        'Age': [25, 30, 35],
        'City': ['New York', 'Los Angeles', 'Chicago']}
df = pd.DataFrame(data)
print(df)

      Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago


In [18]:
#Create a DataFrame with custom index and column order
df = pd.DataFrame(data, index=['A', 'B', 'C'], columns=['Age', 'Name', 'City'])
print(df)

   Age     Name         City
A   25    Alice     New York
B   30      Bob  Los Angeles
C   35  Charlie      Chicago


# other ways to read file

* Reading a CSV file into a DataFrame  
df_csv = pd.read_csv('data.csv')  

* Reading an Excel file into a DataFrame  
df_excel = pd.read_excel('data.xlsx', sheet_name='Sheet1')  

* Reading a JSON file into a DataFrame  
df_json = pd.read_json('data.json')  

* Reading tables from an HTML file into a list of DataFrames   
dfs_html = pd.read_html('data.html')  