# **Fix distribution strategy with Replicated Table**

This notebook:

1. creates tables with replicate distribution
2. shows replication cache concept
3. shows execution plans when joining with replicate tables
4. controllare su nodo con 2 compute se la somma delle righe su nodo 1 e nodo 2 sono identiche e quindi è randomico solo il caricamento nelle distribuzioni (to-do)

DMVs used in this notebook:

- [pdw\_replicated\_table\_cache\_state](https://learn.microsoft.com/en-us/sql/relational-databases/system-catalog-views/sys-pdw-replicated-table-cache-state-transact-sql?view=azure-sqldw-latest?WT.mc_id=DP-MVP-5004236%20): Returns the state of the cache associated with a replicated table by object\_id

# Query 1

Creates 2 Geography tables replicated and insert data from the Geography table.

In [None]:
-- Query 1
CREATE TABLE [dbo].[Geography_replicated]
(
	[GeographyID] [int] NOT NULL,
	[ZipCodeBKey] [varchar](10) NOT NULL,
	[County] [varchar](50) NULL,
	[City] [varchar](50) NULL,
	[State] [varchar](50) NULL,
	[Country] [varchar](50) NULL,
	[ZipCode] [varchar](50) NULL
)
WITH
(
	DISTRIBUTION = replicate,
	CLUSTERED COLUMNSTORE INDEX
)
GO


CREATE TABLE [dbo].[Geography_replicated_for_checks]
(
	[GeographyID] [int] NOT NULL,
	[ZipCodeBKey] [varchar](10) NOT NULL,
	[County] [varchar](50) NULL,
	[City] [varchar](50) NULL,
	[State] [varchar](50) NULL,
	[Country] [varchar](50) NULL,
	[ZipCode] [varchar](50) NULL
)
WITH
(
	DISTRIBUTION = replicate,
	CLUSTERED COLUMNSTORE INDEX
)
GO

insert into dbo.Geography_replicated
select * from dbo.Geography;

insert into dbo.Geography_replicated_for_checks
select * from dbo.Geography;

# Query 2

## <span style="font-size: 14px;">lists tables and their distribution policy</span>

In [None]:
-- Query 2
select
    schema_name = s.name 
    ,table_name = t.name 
    ,distribution_policy = tp.distribution_policy_desc
    ,row_count = sum(row_count) 
    ,max_row_count_per_distribution = max(row_count) 
    ,min_row_count_per_distribution = min(row_count) 
    ,avg_row_count_per_distribution = avg(row_count)
from sys.schemas s
    inner join sys.tables as t on s.schema_id = t.schema_id
    inner join sys.pdw_table_distribution_properties as tp on t.object_id = tp.object_id
    inner join sys.pdw_table_mappings as tm on t.object_id = tm.object_id
    inner join sys.pdw_nodes_tables as nt on tm.physical_name = nt.name
    inner join sys.dm_pdw_nodes_db_partition_stats as nps on nt.object_id = nps.object_id
        and nt.pdw_node_id = nps.pdw_node_id
        and nt.distribution_id = nps.distribution_id
group by 
    s.name
    ,t.name
    ,tp.distribution_policy_desc;

# Query 3

Shows plan of a RR joined to a Replicated

In [None]:
--Query 3
select top(100000) * 
from dbo.Trip as t 
    inner join dbo.Geography_replicated as g on t.PickupGeographyID = g.GeographyID
option (label = 'SELECT FROM ROUND_ROBIN JOIN replicated');

select * from sys.dm_pdw_request_steps as t 
where request_id =
(
    select top(1) request_id 
    from sys.dm_pdw_exec_requests  as t 
    where t.[label] = 'SELECT FROM ROUND_ROBIN JOIN replicated'
    order by start_time desc 
)

# Query 4

Shows replication status for replicated table

In [None]:
-- Query 4
select 
    table_name = sch.[name] + '.' + t.[name] 
     ,p.distribution_policy_desc
     ,c.[state]
from sys.tables t 
    inner join sys.pdw_replicated_table_cache_state as c on c.object_id = t.object_id
    inner join sys.pdw_table_distribution_properties as p  on p.object_id = t.object_id
    inner join sys.schemas as sch on t.schema_id = sch.schema_id
where p.distribution_policy_desc = 'replicate'
order by c.[state] desc, table_name

# Query 5

Shows the replication cache

In [None]:
-- Query 5
DBCC PDW_SHOWSPACEUSED('dbo.Geography_replicated')
DBCC PDW_SHOWSPACEUSED('dbo.Geography_replicated') WITH IGNORE_REPLICATED_TABLE_CACHE 
DBCC PDW_SHOWSPACEUSED('dbo.Geography_replicated_for_checks')

# Query 6

Shows table replication processes

In [None]:
-- Query 6
select 
    [status]
    ,command, submit_time
    ,start_time, end_time
    ,total_elapsed_time
from sys.dm_pdw_exec_requests
where command like '%buildreplicatedtablecache%'
    and command not like '%select%'
order by submit_time desc

# Query 7

Replicate HackenyLicense

In [None]:
-- Query 7
CREATE TABLE [dbo].[HackneyLicense_replicated]
(
	[HackneyLicenseID] [int] NOT NULL,
	[HackneyLicenseBKey] [varchar](50) NOT NULL,
	[HackneyLicenseCode] [varchar](50) NULL
)
WITH
(
	DISTRIBUTION = replicate,
	CLUSTERED COLUMNSTORE INDEX
)
GO

insert into dbo.HackneyLicense_replicated
select * from dbo.HackneyLicense;

# Query 8

Shows cache replication process

In [None]:
-- Query 8
select 
    table_name = sch.[name] + '.' + t.[name] 
     ,p.distribution_policy_desc
     ,c.[state]
from sys.tables t 
    inner join sys.pdw_replicated_table_cache_state as c on c.object_id = t.object_id
    inner join sys.pdw_table_distribution_properties as p  on p.object_id = t.object_id
    inner join sys.schemas as sch on t.schema_id = sch.schema_id
where p.distribution_policy_desc = 'replicate'
order by c.[state] desc, table_name;

select top(0) 1 from dbo.HackneyLicense_replicated;

In [None]:
select 
    table_name = sch.[name] + '.' + t.[name] 
     ,p.distribution_policy_desc
     ,c.[state]
from sys.tables t 
    inner join sys.pdw_replicated_table_cache_state as c on c.object_id = t.object_id
    inner join sys.pdw_table_distribution_properties as p  on p.object_id = t.object_id
    inner join sys.schemas as sch on t.schema_id = sch.schema_id
where p.distribution_policy_desc = 'replicate'
order by c.[state] desc, table_name;

# Query 9

Shows when cache is invalidated

In [None]:
-- Query 9
update dbo.Geography_replicated 
set city = 'data changed'
where 1  = 0

select 
    table_name = sch.[name] + '.' + t.[name] 
     ,p.distribution_policy_desc
     ,c.[state]
from sys.tables t 
    inner join sys.pdw_replicated_table_cache_state as c on c.object_id = t.object_id
    inner join sys.pdw_table_distribution_properties as p  on p.object_id = t.object_id
    inner join sys.schemas as sch on t.schema_id = sch.schema_id
where p.distribution_policy_desc = 'replicate'
order by c.[state] desc, table_name;

# Query 10

Warm up cache

In [None]:
-- Query 10
select top(0) 1 from dbo.Geography_replicated;

select 
    table_name = sch.[name] + '.' + t.[name] 
     ,p.distribution_policy_desc
     ,c.[state]
from sys.tables t 
    inner join sys.pdw_replicated_table_cache_state as c on c.object_id = t.object_id
    inner join sys.pdw_table_distribution_properties as p  on p.object_id = t.object_id
    inner join sys.schemas as sch on t.schema_id = sch.schema_id
where p.distribution_policy_desc = 'replicate'
order by c.[state] desc, table_name;

# Query 11

Show plan when joining with replicated tables

In [None]:
-- Query 11
select top(100000) * 
from dbo.Trip as t 
    inner join dbo.Geography_replicated as g on t.PickupGeographyID = g.GeographyID
    inner join dbo.HackneyLicense_replicated as h on t.HackneyLicenseID = h.HackneyLicenseID
option (label = 'SELECT FROM ROUND_ROBIN JOIN REPLICATEDx2');

select * from sys.dm_pdw_request_steps as t 
where request_id =
(
    select top(1) request_id 
    from sys.dm_pdw_exec_requests  as t 
    where t.[label] = 'SELECT FROM ROUND_ROBIN JOIN REPLICATEDx2'
    order by start_time desc 
)