#### Import required package

In [None]:
import pandas as pd
pd.set_option("display.max_column", None)


#### Read the csv file data sample

In [2]:
df = pd.read_csv("../../../production_data/raw_data_dump/companies.csv")
df.sample(2)


Unnamed: 0,_id,name,organization_size,company_type,created_at,updated_at,is_deleted
5,6822e3346342bdaa117239ba,Singapore Internation School,200+,corporation,2025-05-13 06:14:12.395,2025-05-13 06:14:12.395,False
36,685bf70f4b21f9b04f264b63,XYZ solutions,1-10,startup,2025-06-25 13:18:07.428,2025-06-25 13:18:07.428,False


#### Inspect the shape and columns

In [3]:
print("shape: ", df.shape)
print("Columns: ", df.columns)


shape:  (38, 7)
Columns:  Index(['_id', 'name', 'organization_size', 'company_type', 'created_at',
       'updated_at', 'is_deleted'],
      dtype='object')


#### Inspect detail information about the data

In [4]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 38 entries, 0 to 37
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   _id                38 non-null     object
 1   name               38 non-null     object
 2   organization_size  38 non-null     object
 3   company_type       38 non-null     object
 4   created_at         38 non-null     object
 5   updated_at         38 non-null     object
 6   is_deleted         38 non-null     bool  
dtypes: bool(1), object(6)
memory usage: 1.9+ KB


#### value counts for separate column's values

In [5]:
df['is_deleted'].value_counts()


is_deleted
False    38
Name: count, dtype: int64

In [6]:
df['organization_size'].value_counts()


organization_size
200+      11
51-200    10
1-10       9
11-50      8
Name: count, dtype: int64

In [7]:
df['company_type'].value_counts()


company_type
corporation       10
startup            8
enterprise         6
manufacturing      5
small-business     4
ecommerce          2
consulting         1
freelance          1
non-profit         1
Name: count, dtype: int64

#### Find out unique name counts

In [8]:
df[['name']].count()


name    38
dtype: int64

#### View the unique company names

In [9]:
df[['name']]


Unnamed: 0,name
0,Klizo Solutions
1,Vietnam Australia International School
2,Esco Beach
3,Furarma Resort Danang
4,Furama Resort Da Nang
5,Singapore Internation School
6,Production Company
7,Demo Company
8,QA_Company
9,Bunny House


#### Filter out the required data

##### Filter company name based on organization size condition

In [10]:
filtered_df = df[df['organization_size'] == '11-50'][['name']]
filtered_df


Unnamed: 0,name
0,Klizo Solutions
7,Demo Company
16,Demo Company 2
21,Ad officia sed velit
23,New Company Names
30,Veniam at illum vo
34,ABC Company
35,Abcd Company


##### Filter company name and company type based on organization size condition

In [11]:
filtered_df = df[df['organization_size'] == '11-50'][['name', 'company_type']]
filtered_df


Unnamed: 0,name,company_type
0,Klizo Solutions,corporation
7,Demo Company,small-business
16,Demo Company 2,corporation
21,Ad officia sed velit,corporation
23,New Company Names,enterprise
30,Veniam at illum vo,manufacturing
34,ABC Company,corporation
35,Abcd Company,enterprise


#### Sort the Company table based on company name

SELECT organization_size, name FROM df GROUP BY organization_size ORDER BY name DESC


In [12]:
df_sorted = df.sort_values(by='name', ascending=False)
df_sorted


Unnamed: 0,_id,name,organization_size,company_type,created_at,updated_at,is_deleted
31,68513789c939b52e8b71ad05,internal-company,1-10,small-business,2025-06-17 09:38:17.624,2025-06-17 09:38:17.624,False
36,685bf70f4b21f9b04f264b63,XYZ solutions,1-10,startup,2025-06-25 13:18:07.428,2025-06-25 13:18:07.428,False
1,681c86b51cf44d32fe030963,Vietnam Australia International School,200+,corporation,2025-05-08 10:25:57.981,2025-05-08 10:25:57.981,False
30,684ea957c939b52e8b71ac3c,Veniam at illum vo,11-50,manufacturing,2025-06-15 11:07:03.616,2025-06-15 11:07:03.616,False
37,685bfeeb4b21f9b04f264b68,Unde odit adipisicin,1-10,startup,2025-06-25 13:51:39.126,2025-06-25 13:51:39.126,False
24,6847f945eabd1dae68df0873,Testuikji87k6j7i,200+,enterprise,2025-06-10 09:22:13.284,2025-06-10 09:22:13.284,False
28,684bb3a5c939b52e8b71abaa,Testing,200+,manufacturing,2025-06-13 05:14:13.535,2025-06-13 05:14:13.535,False
27,684af483c939b52e8b71ab72,Test Company,200+,manufacturing,2025-06-12 15:38:43.959,2025-06-12 15:38:43.959,False
14,683eec13b72469b8444e4317,Test,200+,manufacturing,2025-06-03 12:35:31.377,2025-06-03 12:35:31.377,False
5,6822e3346342bdaa117239ba,Singapore Internation School,200+,corporation,2025-05-13 06:14:12.395,2025-05-13 06:14:12.395,False


#### The sorted Company table group by organization size and find the top one company details

In [13]:
df_grouped = df_sorted.groupby('organization_size').first().reset_index()
df_grouped


Unnamed: 0,organization_size,_id,name,company_type,created_at,updated_at,is_deleted
0,1-10,68513789c939b52e8b71ad05,internal-company,small-business,2025-06-17 09:38:17.624,2025-06-17 09:38:17.624,False
1,11-50,684ea957c939b52e8b71ac3c,Veniam at illum vo,manufacturing,2025-06-15 11:07:03.616,2025-06-15 11:07:03.616,False
2,200+,681c86b51cf44d32fe030963,Vietnam Australia International School,corporation,2025-05-08 10:25:57.981,2025-05-08 10:25:57.981,False
3,51-200,6840858aa060d5257c9c3d6d,Quality Analyst Company,startup,2025-06-04 17:42:34.068,2025-06-04 17:42:34.068,False


#### The sorted Company table group by organization size and find the top one company name and organization size

In [14]:
df_result = df_grouped[['organization_size', 'name']]
df_result


Unnamed: 0,organization_size,name
0,1-10,internal-company
1,11-50,Veniam at illum vo
2,200+,Vietnam Australia International School
3,51-200,Quality Analyst Company


#### Find out the company name and organization size sorted by organization size

In [15]:
df_result = df[['organization_size', 'name']].sort_values(by='organization_size', ascending=False)
df_result


Unnamed: 0,organization_size,name
11,51-200,ESCO Beach Vietnam
13,51-200,F comany
2,51-200,Esco Beach
25,51-200,Klizo Solutions2
18,51-200,In magna dolore temp
6,51-200,Production Company
17,51-200,Quality Analyst Company
8,51-200,QA_Company
33,51-200,Interview Screener_Klizo Solution
10,51-200,Klizo QA
